Skip to content

Commit

Permalink
Merge pull request #801 from PyThaiNLP/dev
Browse files Browse the repository at this point in the history
Update 4.0 branch
  • Loading branch information
wannaphong committed May 30, 2023
2 parents 494f5ae + 33c5b5a commit 4e9adf0
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 14 deletions.
2 changes: 1 addition & 1 deletion pythainlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#
# URL: <https://pythainlp.github.io/>
# For license information, see LICENSE
__version__ = "4.0.0"
__version__ = "4.0.2"

thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars

Expand Down
39 changes: 29 additions & 10 deletions pythainlp/khavee/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
from typing import List, Union
from pythainlp.tokenize import subword_tokenize
from pythainlp.util import sound_syllable


from pythainlp.util import remove_tonemark
class KhaveeVerifier:
def __init__(self):
"""
Expand Down Expand Up @@ -185,10 +184,15 @@ def check_sara(self, word: str)-> str:
sara.append('ออ')

# incase บ่
if 'บ่' in word:
if 'บ่' == word:
sara = []
sara.append('ออ')

if 'ํ' in word:
sara = []
sara.append('อำ')
if 'เ' in word and 'ื' in word and 'อ' in word:
sara = []
sara.append('เอือ')
if sara == []:
return 'Cant find Sara in this word'
else:
Expand All @@ -215,11 +219,8 @@ def check_marttra(self, word: str) -> str:
"""
if word[-1] == 'ร' and word[-2] in ['ต','ท'] :
word = word[:-1]
if '์' in word[-1]:
if 'ิ' in word[-2] or 'ุ' in word[-2]:
word = word[:-3]
else:
word = word[:-2]
word = self.handle_karun_sound_silence(word)
word = remove_tonemark(word)
if 'ำ' in word or ('ํ' in word and 'า' in word) or 'ไ' in word or 'ใ' in word:
return 'กา'
elif word[-1] in ['า','ะ','ิ','ี','ุ','ู','อ'] or ('ี' in word and 'ย' in word[-1]) or ('ื' in word and 'อ' in word[-1]):
Expand Down Expand Up @@ -417,7 +418,6 @@ def check_klon(self, text: str,k_type: int=8) -> Union[List[str], str]:
def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool = False) -> Union[List[bool], List[str], bool, str]:
"""
Thai tonal word checker
:param Union[List[str], str] text: Thai word or list of Thai words
:param bool dead_syllable_as_aek: if True, dead syllable will be considered as aek
:return: the check if the word is aek or too or False(not both) or list of the check if input is list
Expand Down Expand Up @@ -453,3 +453,22 @@ def check_aek_too(self, text: Union[List[str], str], dead_syllable_as_aek:bool =
return 'aek'
else:
return False

def handle_karun_sound_silence(self, word: str) -> str:
"""
Handle sound silence in Thai word using '์' character (Karun)
by stripping all the characters before the 'Karun' character that should be silenced
:param str text: Thai word
:return: Thai word with silence word stripped
:rtype: str
"""
sound_silenced = True if word.endswith('์') else False
if not sound_silenced:
return word
thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"
locate_silenced = word.rfind('์') - 1
can_silence_two = True if word[locate_silenced-2] in thai_consonants else False
cut_off = 2 if can_silence_two else 1
word = word[:locate_silenced + 1 - cut_off]
return word
5 changes: 4 additions & 1 deletion pythainlp/khavee/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,13 @@
# True

# การตรวจสอบคำสำผัสที่ผิด
print('สรร ขวาน',kv.is_sumpus('สรร','ขวาน'))
print('เพื่อน ล้วน',kv.is_sumpus('เพื่อน','ล้วน'))
# False

# การตรวจสอบคำ ครุ ลหุ
print('สรร',kv.check_karu_lahu('สรร'))
#karu

# การตรวจสอบคำ ครุ ลหุ
print('ชิชะ',kv.check_karu_lahu('ชิชะ'))
# lahu
Expand Down Expand Up @@ -66,3 +67,5 @@
# -> False, aek, too
print(kv.check_aek_too(['เอง', 'เอ่ง', 'เอ้ง'])) # ใช้ List ได้เหมือนกัน
# -> [False, 'aek', 'too']
print(kv.check_aek_too(['ห๊ะ', 'เอ่ง', 'เอ้ง'], dead_syllable_as_aek=True)) # ใช้ List ได้เหมือนกัน และสามารถตั้งค่า ให้นับคำที่เสียงตายเป็นเอกได้ ตามการเช็คคฉันทลักษณ์กลอน
# -> ['aek', 'aek', 'too']
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 4.0.0
current_version = 4.0.1
commit = True
tag = True
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@

setup(
name="pythainlp",
version="4.0.0",
version="4.0.2",
description="Thai Natural Language Processing library",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down

0 comments on commit 4e9adf0

Please sign in to comment.