diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
index 73aa896c6..9feccaa53 100644
--- a/pythainlp/__init__.py
+++ b/pythainlp/__init__.py
@@ -17,7 +17,7 @@
#
# URL:
# For license information, see LICENSE
-__version__ = "4.0.0"
+__version__ = "4.0.2"
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars
diff --git a/pythainlp/khavee/core.py b/pythainlp/khavee/core.py
index fc38ee1f1..be007718e 100644
--- a/pythainlp/khavee/core.py
+++ b/pythainlp/khavee/core.py
@@ -15,8 +15,7 @@
from typing import List, Union
from pythainlp.tokenize import subword_tokenize
from pythainlp.util import sound_syllable
-
-
+from pythainlp.util import remove_tonemark
class KhaveeVerifier:
def __init__(self):
"""
@@ -185,10 +184,15 @@ def check_sara(self, word: str)-> str:
sara.append('ออ')
# incase บ่
- if 'บ่' in word:
+ if 'บ่' == word:
sara = []
sara.append('ออ')
-
+ if 'ํ' in word:
+ sara = []
+ sara.append('อำ')
+ if 'เ' in word and 'ื' in word and 'อ' in word:
+ sara = []
+ sara.append('เอือ')
if sara == []:
return 'Cant find Sara in this word'
else:
@@ -215,11 +219,8 @@ def check_marttra(self, word: str) -> str:
"""
if word[-1] == 'ร' and word[-2] in ['ต','ท'] :
word = word[:-1]
- if '์' in word[-1]:
- if 'ิ' in word[-2] or 'ุ' in word[-2]:
- word = word[:-3]
- else:
- word = word[:-2]
+ word = self.handle_karun_sound_silence(word)
+ word = remove_tonemark(word)
if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word:
return 'กา'
elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]):
@@ -417,7 +418,6 @@ def check_klon(self, text: str,k_type: int=8) -> Union[List[str], str]:
def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = False) -> Union[List[bool], List[str], bool, str]:
"""
Thai tonal word checker
-
:param Union[List[str], str] text: Thai word or list of Thai words
:param bool dead_syllable_as_aek: if True, dead syllable will be considered as aek
:return: the check if the word is aek or too or False(not both) or list of the check if input is list
@@ -453,3 +453,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool =
return 'aek'
else:
return False
+
+ def handle_karun_sound_silence(self, word: str) -> str:
+ """
+ Handle sound silence in Thai word using '์' character (Karun)
+ by stripping all the characters before the 'Karun' character that should be silenced
+
+ :param str text: Thai word
+ :return: Thai word with silence word stripped
+ :rtype: str
+ """
+ sound_silenced = True if word.endswith('์') else False
+ if not sound_silenced:
+ return word
+ thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"
+ locate_silenced = word.rfind('์') - 1
+ can_silence_two = True if word[locate_silenced-2] in thai_consonants else False
+ cut_off = 2 if can_silence_two else 1
+ word = word[:locate_silenced + 1 - cut_off]
+ return word
diff --git a/pythainlp/khavee/example.py b/pythainlp/khavee/example.py
index f8e818538..b6dfba79c 100644
--- a/pythainlp/khavee/example.py
+++ b/pythainlp/khavee/example.py
@@ -16,12 +16,13 @@
# True
# การตรวจสอบคำสำผัสที่ผิด
-print('สรร ขวาน',kv.is_sumpus('สรร','ขวาน'))
+print('เพื่อน ล้วน',kv.is_sumpus('เพื่อน','ล้วน'))
# False
# การตรวจสอบคำ ครุ ลหุ
print('สรร',kv.check_karu_lahu('สรร'))
#karu
+
# การตรวจสอบคำ ครุ ลหุ
print('ชิชะ',kv.check_karu_lahu('ชิชะ'))
# lahu
@@ -66,3 +67,5 @@
# -> False, aek, too
print(kv.check_aek_too(['เอง', 'เอ่ง', 'เอ้ง'])) # ใช้ List ได้เหมือนกัน
# -> [False, 'aek', 'too']
+print(kv.check_aek_too(['ห๊ะ', 'เอ่ง', 'เอ้ง'], dead_syllable_as_aek=True)) # ใช้ List ได้เหมือนกัน และสามารถตั้งค่า ให้นับคำที่เสียงตายเป็นเอกได้ ตามการเช็คคฉันทลักษณ์กลอน
+# -> ['aek', 'aek', 'too']
diff --git a/setup.cfg b/setup.cfg
index 03a441c08..121adc632 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[bumpversion]
-current_version = 4.0.0
+current_version = 4.0.1
commit = True
tag = True
parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\-(?P[a-z]+)(?P\d+))?
diff --git a/setup.py b/setup.py
index 976c513db..c03533bf7 100644
--- a/setup.py
+++ b/setup.py
@@ -143,7 +143,7 @@
setup(
name="pythainlp",
- version="4.0.0",
+ version="4.0.2",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",