Skip to content

Commit

Permalink
Merge pull request #148 from bact/dev
Browse files Browse the repository at this point in the history
Simplify bahttext() code
  • Loading branch information
bact authored Nov 4, 2018
2 parents 2d04829 + b7ff45e commit 3a0cc37
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 93 deletions.
68 changes: 31 additions & 37 deletions pythainlp/number/thainum.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@
Adapted from
http://justmindthought.blogspot.com/2012/12/code-php.html
"""
import ast
import math

__all__ = ["bahttext", "num_to_thaiword"]

p = [
_p = [
["ภาษาไทย", "ตัวเลข", "เลขไทย"],
["หนึ่ง", "1", "๑"],
["สอง", "2", "๒"],
Expand All @@ -22,18 +21,15 @@
["แปด", "8", "๘"],
["เก้า", "9", "๙"],
]
thaitonum = dict((x[2], x[1]) for x in p[1:])
p1 = dict((x[0], x[1]) for x in p[1:])
d1 = 0


# เลขไทยสู่เลข
# เลขไทยสู่เลขอารบิก
def thai_num_to_num(text):
"""
:param str text: Thai number characters such as '๑', '๒', '๓'
:return: universal numbers such as '1', '2', '3'
"""
thaitonum = dict((x[2], x[1]) for x in p[1:])
thaitonum = dict((x[2], x[1]) for x in _p[1:])
return thaitonum[text]


Expand All @@ -42,7 +38,7 @@ def thai_num_to_text(text):
:param str text: Thai number characters such as '๑', '๒', '๓'
:return: Thai numbers, spelled out in Thai
"""
thaitonum = dict((x[2], x[0]) for x in p[1:])
thaitonum = dict((x[2], x[0]) for x in _p[1:])
return thaitonum[text]


Expand All @@ -51,7 +47,7 @@ def num_to_thai_num(text):
:param text: universal numbers such as '1', '2', '3'
:return: Thai number characters such as '๑', '๒', '๓'
"""
thaitonum = dict((x[1], x[2]) for x in p[1:])
thaitonum = dict((x[1], x[2]) for x in _p[1:])
return thaitonum[text]


Expand All @@ -60,7 +56,7 @@ def num_to_text(text):
:param text: universal numbers such as '1', '2', '3'
:return: Thai numbers, spelled out in Thai
"""
thaitonum = dict((x[1], x[0]) for x in p[1:])
thaitonum = dict((x[1], x[0]) for x in _p[1:])
return thaitonum[text]


Expand All @@ -69,7 +65,7 @@ def text_to_num(text):
:param text: Thai numbers, spelled out in Thai
:return: universal numbers such as '1', '2', '3'
"""
thaitonum = dict((x[0], x[1]) for x in p[1:])
thaitonum = dict((x[0], x[1]) for x in _p[1:])
return thaitonum[text]


Expand All @@ -78,47 +74,34 @@ def text_to_thai_num(text):
:param text: Thai numbers, spelled out in Thai
:return: Thai numbers such as '๑', '๒', '๓'
"""
thaitonum = dict((x[0], x[2]) for x in p[1:])
thaitonum = dict((x[0], x[2]) for x in _p[1:])
return thaitonum[text]


def number_format(num, places=0):
return "{:20,.2f}".format(num)


def bahttext(amount_number):
def bahttext(number):
"""
Converts a number to Thai text and adds a suffix of "Baht" currency.
Precision will be fixed at two decimal places (0.00) to fits "Satang" unit.
Similar to BAHTTEXT function in Excel
"""
ret = ""

if amount_number is None:
if number is None:
pass
elif amount_number == 0:
elif number == 0:
ret = "ศูนย์บาทถ้วน"
else:
amount_number = number_format(amount_number, 2).replace(" ", "")
pt = amount_number.find(".")
number, fraction = "", ""
amount_number1 = amount_number.split(".")

if not pt:
number = amount_number
else:
amount_number = amount_number.split(".")
number = amount_number[0]
fraction = int(amount_number1[1])

number = ast.literal_eval(number.replace(",", ""))
num_int, num_dec = "{:.2f}".format(number).split(".")
num_int = int(num_int)
num_dec = int(num_dec)

baht = num_to_thaiword(number)
if baht != "":
baht = num_to_thaiword(num_int)
if baht:
ret = "".join([ret, baht, "บาท"])

satang = num_to_thaiword(fraction)
if satang != "" and satang != "ศูนย์":
satang = num_to_thaiword(num_dec)
if satang and satang != "ศูนย์":
ret = "".join([ret, satang, "สตางค์"])
else:
ret = "".join([ret, "ถ้วน"])
Expand All @@ -139,7 +122,18 @@ def num_to_thaiword(number):
ret = "ศูนย์"
else:
_POS_CALL = ["แสน", "หมื่น", "พัน", "ร้อย", "สิบ", ""]
_NUM_CALL = ["", "หนึ่ง", "สอง", "สาม", "สี่", "ห้า", "หก", "เจ็ด", "แปด", "เก้า"]
_NUM_CALL = [
"",
"หนึ่ง",
"สอง",
"สาม",
"สี่",
"ห้า",
"หก",
"เจ็ด",
"แปด",
"เก้า",
]

if number > 1000000:
ret += num_to_thaiword(int(number / 1000000)) + "ล้าน"
Expand Down
4 changes: 2 additions & 2 deletions pythainlp/rank/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

_STOPWORDS = thai_stopwords()


# เรียงจำนวนคำของประโยค
def rank(data, stopword=False):
"""
Expand All @@ -20,5 +21,4 @@ def rank(data, stopword=False):


if __name__ == "__main__":
text = ["แมว", "ชอบ", "ปลา", "แมว", "ชอบ", "นอน", "คน", "เป็น", "ทาส", "แมว"]
print(rank(text))
print(rank(["แมว", "ชอบ", "ปลา", "แมว", "ชอบ", "นอน", "คน", "เป็น", "ทาส", "แมว"]))
37 changes: 0 additions & 37 deletions pythainlp/romanization/royin.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,41 +180,4 @@ def romanize(word):


if __name__ == "__main__":
print(romanize("แมว") == "maeo")
print(romanize("น้าว") == "nao")
print(romanize("รวม") == "ruam")
print(romanize("ไทย") == "thai")
print(romanize("ผัวะ") == "phua")
print(romanize("ใย") == "yai")
print(romanize("ไล่") == "lai")
print(romanize("เมา") == "mao")
print(romanize("ต้น") == "ton")
print(romanize("ตาล") == "tan")
print(romanize("แสง") == "saeng")
print(romanize("เลียน") == "lian")
print(romanize("เลือก") == "lueak")
print(romanize("เธอ") == "thoe")
print(romanize("หรู") == "ru")
print(romanize("ลอม") == "lom")
print(romanize("และ") == "lae")
print(romanize("เลาะ") == "lo")
print(romanize("ลอม") == "lom")
print(romanize("เล็ง") == "leng")
print(romanize("นึก") == "nuek")
print(romanize("มัว") == "mua")
print(romanize("มีด") == "mit")
print(romanize("โค") == "kho")
print(romanize("ขอ") == "kho")
print(romanize("วรร") == "wan")
print(romanize("สรรพ") == "sap")
print(romanize("วัน") + romanize("นะ") + romanize("พง"))
print(romanize("นัด") + romanize("ชะ") + romanize("โนน"))
print(romanize("สรรพ"))
print(romanize("สรร") + romanize("หา"))
print(romanize("สรร") + romanize("หา"))
print(romanize("แมว"))
print(romanize("กร") == romanize("กอน"))
print(romanize("คฤ") + romanize("หาสน์"))
print(romanize("กฤ") + romanize("ศะ") + romanize("ฎา"))
print(romanize("ฤกษ์"))
print(romanize("ฤ") + romanize("ดู") + romanize("กาล"))
8 changes: 2 additions & 6 deletions pythainlp/sentiment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,10 @@ def sentiment(text, engine="old"):

return "pos" if tag else "neg"
else: # default, use "old" vocabulary-based engine
with open(
os.path.join(_SENTIMENT_PATH, "vocabulary.data"), "rb"
) as in_strm:
with open(os.path.join(_SENTIMENT_PATH, "vocabulary.data"), "rb") as in_strm:
vocabulary = dill.load(in_strm)

with open(
os.path.join(_SENTIMENT_PATH, "sentiment.data"), "rb"
) as in_strm:
with open(os.path.join(_SENTIMENT_PATH, "sentiment.data"), "rb") as in_strm:
classifier = dill.load(in_strm)

text = set(word_tokenize(text)) - _STOPWORDS
Expand Down
5 changes: 1 addition & 4 deletions pythainlp/tokenize/pyicu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,4 @@ def segment(text):


if __name__ == "__main__":
print(segment("ทดสอบระบบตัดคำด้วยไอซียู"))
print(segment("ผมชอบพูดไทยคำEnglishคำ"))
print(segment("ไทยEnglish540บาท"))
print(segment("ประหยัด ไฟเบอห้า"))
print(segment("พูดไทย2คำEnglishคำ"))
9 changes: 2 additions & 7 deletions pythainlp/util/keyboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,5 @@ def thai_to_eng(text):


if __name__ == "__main__":
a = "l;ylfu8iy["
a = eng_to_thai(a)
a = eng_to_thai(a)
b = "นามรสนอำันี"
b = thai_to_eng(b)
print(a)
print(b)
print(eng_to_thai("l;ylfu8iy["))
print(thai_to_eng("นามรสนอำันี"))

0 comments on commit 3a0cc37

Please sign in to comment.