diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py index 73aa896c6..9feccaa53 100644 --- a/pythainlp/__init__.py +++ b/pythainlp/__init__.py @@ -17,7 +17,7 @@ # # URL: # For license information, see LICENSE -__version__ = "4.0.0" +__version__ = "4.0.2" thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py index fc38ee1f1..be007718e 100644 --- a/pythainlp/khavee/core.py +++ b/pythainlp/khavee/core.py @@ -15,8 +15,7 @@ from typing import List, Union from pythainlp.tokenize import subword_tokenize from pythainlp.util import sound_syllable - - +from pythainlp.util import remove_tonemark class KhaveeVerifier: def __init__(self): """ @@ -185,10 +184,15 @@ def check_sara(self, word: str)-> str: sara.append('ออ') # incase บ่ - if 'บ่' in word: + if 'บ่' == word: sara = [] sara.append('ออ') - + if 'ํ' in word: + sara = [] + sara.append('อำ') + if 'เ' in word and 'ื' in word and 'อ' in word: + sara = [] + sara.append('เอือ') if sara == []: return 'Cant find Sara in this word' else: @@ -215,11 +219,8 @@ def check_marttra(self, word: str) -> str: """ if word[-1] == 'ร' and word[-2] in ['ต','ท'] : word = word[:-1] - if '์' in word[-1]: - if 'ิ' in word[-2] or 'ุ' in word[-2]: - word = word[:-3] - else: - word = word[:-2] + word = self.handle_karun_sound_silence(word) + word = remove_tonemark(word) if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word: return 'กา' elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]): @@ -417,7 +418,6 @@ def check_klon(self, text: str,k_type: int=8) -> Union[List[str], str]: def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = False) -> Union[List[bool], List[str], bool, str]: """ Thai tonal word checker - :param Union[List[str], str] text: Thai word or list of Thai words :param bool dead_syllable_as_aek: if True, dead syllable will be considered as aek :return: the check if the word is aek or too or False(not both) or list of the check if input is list @@ -453,3 +453,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = return 'aek' else: return False + + def handle_karun_sound_silence(self, word: str) -> str: + """ + Handle sound silence in Thai word using '์' character (Karun) + by stripping all the characters before the 'Karun' character that should be silenced + + :param str text: Thai word + :return: Thai word with silence word stripped + :rtype: str + """ + sound_silenced = True if word.endswith('์') else False + if not sound_silenced: + return word + thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" + locate_silenced = word.rfind('์') - 1 + can_silence_two = True if word[locate_silenced-2] in thai_consonants else False + cut_off = 2 if can_silence_two else 1 + word = word[:locate_silenced + 1 - cut_off] + return word diff --git a/pythainlp/khavee/example.py b/pythainlp/khavee/example.py index f8e818538..b6dfba79c 100644 --- a/pythainlp/khavee/example.py +++ b/pythainlp/khavee/example.py @@ -16,12 +16,13 @@ # True # การตรวจสอบคำสำผัสที่ผิด -print('สรร ขวาน',kv.is_sumpus('สรร','ขวาน')) +print('เพื่อน ล้วน',kv.is_sumpus('เพื่อน','ล้วน')) # False # การตรวจสอบคำ ครุ ลหุ print('สรร',kv.check_karu_lahu('สรร')) #karu + # การตรวจสอบคำ ครุ ลหุ print('ชิชะ',kv.check_karu_lahu('ชิชะ')) # lahu @@ -66,3 +67,5 @@ # -> False, aek, too print(kv.check_aek_too(['เอง', 'เอ่ง', 'เอ้ง'])) # ใช้ List ได้เหมือนกัน # -> [False, 'aek', 'too'] +print(kv.check_aek_too(['ห๊ะ', 'เอ่ง', 'เอ้ง'], dead_syllable_as_aek=True)) # ใช้ List ได้เหมือนกัน และสามารถตั้งค่า ให้นับคำที่เสียงตายเป็นเอกได้ ตามการเช็คคฉันทลักษณ์กลอน +# -> ['aek', 'aek', 'too'] diff --git a/setup.cfg b/setup.cfg index 03a441c08..121adc632 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 4.0.0 +current_version = 4.0.1 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))? diff --git a/setup.py b/setup.py index 976c513db..c03533bf7 100644 --- a/setup.py +++ b/setup.py @@ -143,7 +143,7 @@ setup( name="pythainlp", - version="4.0.0", + version="4.0.2", description="Thai Natural Language Processing library", long_description=readme, long_description_content_type="text/markdown",