Skip to content

Commit

Permalink
Merge pull request #793 from HRNPH/dev
Browse files Browse the repository at this point in the history
Fix mishandling Karun in Kavee Matra Checker
  • Loading branch information
wannaphong authored Apr 25, 2023
2 parents 462a83e + dd4f0a1 commit 52ff97e
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 7 deletions.
27 changes: 20 additions & 7 deletions pythainlp/khavee/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from pythainlp.tokenize import subword_tokenize
from pythainlp.util import sound_syllable


class KhaveeVerifier:
def __init__(self):
"""
Expand Down Expand Up @@ -215,11 +214,7 @@ def check_marttra(self, word: str) -> str:
"""
if word[-1] == 'ร' and word[-2] in ['ต','ท'] :
word = word[:-1]
if '์' in word[-1]:
if 'ิ' in word[-2] or 'ุ' in word[-2]:
word = word[:-3]
else:
word = word[:-2]
word = self.handle_karun_sound_silence(word)
if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word:
return 'กา'
elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]):
Expand Down Expand Up @@ -417,7 +412,6 @@ def check_klon(self, text: str,k_type: int=8) -> Union[List[str], str]:
def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = False) -> Union[List[bool], List[str], bool, str]:
"""
Thai tonal word checker
:param Union[List[str], str] text: Thai word or list of Thai words
:param bool dead_syllable_as_aek: if True, dead syllable will be considered as aek
:return: the check if the word is aek or too or False(not both) or list of the check if input is list
Expand Down Expand Up @@ -453,3 +447,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool =
return 'aek'
else:
return False

def handle_karun_sound_silence(self, word: str) -> str:
"""
Handle sound silence in Thai word using '์' character (Karun)
by stripping all the characters before the 'Karun' character that should be silenced
:param str text: Thai word
:return: Thai word with silence word stripped
:rtype: str
"""
sound_silenced = True if word.endswith('์') else False
if not sound_silenced:
return word
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"
locate_silenced = word.rfind('์') - 1
can_silence_two = True if word[locate_silenced-2] in thai_consonants else False
cut_off = 2 if can_silence_two else 1
word = word[:locate_silenced + 1 - cut_off]
return word
2 changes: 2 additions & 0 deletions pythainlp/khavee/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,5 @@
# -> False, aek, too
print(kv.check_aek_too(['เอง', 'เอ่ง', 'เอ้ง'])) # ใช้ List ได้เหมือนกัน
# -> [False, 'aek', 'too']
print(kv.check_aek_too(['ห๊ะ', 'เอ่ง', 'เอ้ง'], dead_syllable_as_aek=True)) # ใช้ List ได้เหมือนกัน และสามารถตั้งค่า ให้นับคำที่เสียงตายเป็นเอกได้ ตามการเช็คคฉันทลักษณ์กลอน
# -> ['aek', 'aek', 'too']

0 comments on commit 52ff97e

Please sign in to comment.