-
Notifications
You must be signed in to change notification settings - Fork 270
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #234 from PyThaiNLP/issue224_refactor_tests
Issue #224: Refactor the test file
- Loading branch information
Showing
12 changed files
with
821 additions
and
646 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import datetime | ||
import os | ||
import sys | ||
import unittest | ||
|
||
from nltk.corpus import wordnet as wn | ||
|
||
from pythainlp.corpus import ( | ||
_CORPUS_PATH, | ||
conceptnet, | ||
countries, | ||
download, | ||
provinces, | ||
remove, | ||
thai_negations, | ||
thai_stopwords, | ||
thai_syllables, | ||
thai_words, | ||
tnc, | ||
ttc, | ||
wordnet, | ||
) | ||
from pythainlp.corpus.common import _THAI_WORDS_FILENAME | ||
|
||
|
||
class TestCorpusPackage(unittest.TestCase): | ||
|
||
def test_conceptnet(self): | ||
self.assertIsNotNone(conceptnet.edges("รัก")) | ||
|
||
def test_corpus(self): | ||
self.assertIsNotNone(countries()) | ||
self.assertIsNotNone(provinces()) | ||
self.assertIsNotNone(thai_negations()) | ||
self.assertIsNotNone(thai_stopwords()) | ||
self.assertIsNotNone(thai_syllables()) | ||
self.assertIsNotNone(thai_words()) | ||
download("test") | ||
self.assertIsNotNone(remove("test")) | ||
self.assertIsNotNone(remove("tnc_freq")) | ||
|
||
def test_tnc(self): | ||
self.assertIsNotNone(tnc.word_freqs()) | ||
self.assertIsNotNone(tnc.word_freq("นก")) | ||
|
||
def test_ttc(self): | ||
self.assertIsNotNone(ttc.word_freqs()) | ||
|
||
def test_wordnet(self): | ||
self.assertIsNotNone(wordnet.langs()) | ||
|
||
self.assertEqual( | ||
wordnet.synset("spy.n.01").lemma_names("tha"), ["สปาย", "สายลับ"] | ||
) | ||
self.assertIsNotNone(wordnet.synsets("นก")) | ||
self.assertIsNotNone(wordnet.all_synsets(pos=wn.ADJ)) | ||
|
||
self.assertIsNotNone(wordnet.lemmas("นก")) | ||
self.assertIsNotNone(wordnet.all_lemma_names(pos=wn.ADV)) | ||
self.assertIsNotNone(wordnet.lemma("cat.n.01.cat")) | ||
|
||
self.assertEqual(wordnet.morphy("dogs"), "dog") | ||
|
||
bird = wordnet.synset("bird.n.01") | ||
mouse = wordnet.synset("mouse.n.01") | ||
self.assertEqual( | ||
wordnet.path_similarity(bird, mouse), bird.path_similarity(mouse) | ||
) | ||
self.assertEqual( | ||
wordnet.wup_similarity(bird, mouse), bird.wup_similarity(mouse) | ||
) | ||
|
||
cat_key = wordnet.synsets("แมว")[0].lemmas()[0].key() | ||
self.assertIsNotNone(wordnet.lemma_from_key(cat_key)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import datetime | ||
import os | ||
import sys | ||
import unittest | ||
|
||
from pythainlp.soundex import lk82, metasound, soundex, udom83 | ||
|
||
|
||
class TestSoundexPackage(unittest.TestCase): | ||
|
||
def test_soundex(self): | ||
self.assertIsNotNone(soundex("a", engine="lk82")) | ||
self.assertIsNotNone(soundex("a", engine="udom83")) | ||
self.assertIsNotNone(soundex("a", engine="metasound")) | ||
self.assertIsNotNone(soundex("a", engine="XXX")) | ||
|
||
self.assertEqual(lk82(None), "") | ||
self.assertEqual(lk82(""), "") | ||
self.assertEqual(lk82("เหตุ"), lk82("เหด")) | ||
self.assertEqual(lk82("รถ"), "ร3000") | ||
self.assertIsNotNone(lk82("เกาะ")) | ||
self.assertIsNotNone(lk82("อุยกูร์")) | ||
self.assertIsNotNone(lk82("หยากไย่")) | ||
self.assertIsNotNone(lk82("หอ")) | ||
self.assertEqual(lk82("น์"), "") | ||
|
||
self.assertEqual(udom83(None), "") | ||
self.assertEqual(udom83(""), "") | ||
self.assertEqual(udom83("เหตุ"), udom83("เหด")) | ||
self.assertEqual(udom83("รถ"), "ร800000") | ||
|
||
self.assertEqual(metasound(None), "") | ||
self.assertEqual(metasound(""), "") | ||
self.assertEqual(metasound("เหตุ"), metasound("เหด")) | ||
self.assertEqual(metasound("รักษ์"), metasound("รัก")) | ||
self.assertEqual(metasound("บูรณะ"), "บ550") | ||
self.assertEqual(metasound("คน"), "ค500") | ||
self.assertEqual(metasound("คนA"), "ค500") | ||
self.assertEqual(metasound("ดา"), "ด000") | ||
self.assertIsNotNone(metasound("จะ")) | ||
self.assertIsNotNone(metasound("ปา")) | ||
self.assertIsNotNone(metasound("งง")) | ||
self.assertIsNotNone(metasound("ลา")) | ||
self.assertIsNotNone(metasound("มา")) | ||
self.assertIsNotNone(metasound("ยา")) | ||
self.assertIsNotNone(metasound("วา")) | ||
self.assertIsNotNone(metasound("บูชา")) | ||
self.assertIsNotNone(metasound("กมลา")) | ||
self.assertIsNotNone(metasound("กาโวกาโว")) | ||
self.assertIsNotNone(metasound("สุวรรณา")) | ||
self.assertIsNotNone(metasound("ดอยบอย")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import datetime | ||
import os | ||
import sys | ||
import unittest | ||
|
||
from pythainlp.spell import NorvigSpellChecker, correct, spell | ||
|
||
|
||
class TestSpellPackage(unittest.TestCase): | ||
|
||
def test_spell(self): | ||
self.assertEqual(spell(None), "") | ||
self.assertEqual(spell(""), "") | ||
self.assertIsNotNone(spell("เน้ร")) | ||
self.assertIsNotNone(spell("เกสมร์")) | ||
|
||
self.assertEqual(correct(None), "") | ||
self.assertEqual(correct(""), "") | ||
self.assertIsNotNone(correct("ทดสอง")) | ||
|
||
checker = NorvigSpellChecker(dict_filter="") | ||
self.assertIsNotNone(checker.dictionary()) | ||
self.assertGreaterEqual(checker.prob("มี"), 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import datetime | ||
import os | ||
import sys | ||
import unittest | ||
|
||
from pythainlp.summarize import summarize | ||
|
||
|
||
class TestSummarizePackage(unittest.TestCase): | ||
|
||
def test_summarize(self): | ||
text = "อาหาร หมายถึง ของแข็งหรือของเหลว " | ||
text += "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว " | ||
text += "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย " | ||
text += "ทำให้ร่างกายเจริญเติบโต " | ||
text += "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย " | ||
text += "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ " | ||
text += "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย" | ||
self.assertEqual( | ||
summarize(text=text, n=1, engine="frequency"), | ||
["อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"], | ||
) | ||
self.assertIsNotNone(summarize(text, 1, engine="XX")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
# -*- coding: utf-8 -*- | ||
|
||
import datetime | ||
import os | ||
import sys | ||
import unittest | ||
|
||
from pythainlp.tag import perceptron, pos_tag, pos_tag_sents, unigram | ||
from pythainlp.tag.locations import tag_provinces | ||
from pythainlp.tag.named_entity import ThaiNameTagger | ||
from pythainlp.tokenize import ( | ||
word_tokenize, | ||
) | ||
|
||
|
||
class TestTagPackage(unittest.TestCase): | ||
|
||
def test_pos_tag(self): | ||
tokens = ["ผม", "รัก", "คุณ"] | ||
|
||
self.assertEqual(pos_tag(None), []) | ||
self.assertEqual(pos_tag([]), []) | ||
|
||
self.assertEqual(unigram.tag(None, corpus="pud"), []) | ||
self.assertEqual(unigram.tag([], corpus="pud"), []) | ||
self.assertEqual(unigram.tag(None, corpus="orchid"), []) | ||
self.assertEqual(unigram.tag([], corpus="orchid"), []) | ||
|
||
self.assertIsNotNone( | ||
pos_tag(tokens, engine="unigram", corpus="orchid") | ||
) | ||
self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud")) | ||
self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud")) | ||
self.assertEqual( | ||
pos_tag(word_tokenize("คุณกำลังประชุม"), engine="unigram"), | ||
[("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], | ||
) | ||
|
||
self.assertIsNotNone( | ||
pos_tag(tokens, engine="perceptron", corpus="orchid") | ||
) | ||
self.assertIsNotNone( | ||
pos_tag(tokens, engine="perceptron", corpus="pud") | ||
) | ||
self.assertEqual(perceptron.tag(None, corpus="pud"), []) | ||
self.assertEqual(perceptron.tag([], corpus="pud"), []) | ||
self.assertEqual(perceptron.tag(None, corpus="orchid"), []) | ||
self.assertEqual(perceptron.tag([], corpus="orchid"), []) | ||
|
||
self.assertIsNotNone(pos_tag(None, engine="artagger")) | ||
self.assertIsNotNone(pos_tag([], engine="artagger")) | ||
self.assertIsNotNone(pos_tag(tokens, engine="artagger")) | ||
self.assertEqual( | ||
pos_tag(word_tokenize("คุณกำลังประชุม"), engine="artagger"), | ||
[("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")], | ||
) | ||
|
||
self.assertEqual(pos_tag_sents(None), []) | ||
self.assertEqual(pos_tag_sents([]), []) | ||
self.assertEqual( | ||
pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]), | ||
[ | ||
[("ผม", "PPRS"), ("กิน", "VACT"), ("ข้าว", "NCMN")], | ||
[("แมว", "NCMN"), ("วิ่ง", "VACT")], | ||
], | ||
) | ||
|
||
# ### pythainlp.tag.locations | ||
|
||
def test_ner_locations(self): | ||
self.assertEqual( | ||
tag_provinces(["หนองคาย", "น่าอยู่"]), | ||
[("หนองคาย", "B-LOCATION"), ("น่าอยู่", "O")], | ||
) | ||
|
||
# ### pythainlp.tag.named_entity | ||
|
||
def test_ner(self): | ||
ner = ThaiNameTagger() | ||
self.assertEqual(ner.get_ner(""), []) | ||
self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า")) | ||
self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False)) | ||
self.assertIsNotNone( | ||
ner.get_ner( | ||
"""คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น | ||
วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง | ||
จังหวัดหนองคาย 43000""" | ||
) | ||
) | ||
# self.assertEqual( | ||
# ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"), | ||
# [ | ||
# ("แมว", "NCMN", "O"), | ||
# ("ทำ", "VACT", "O"), | ||
# ("อะไร", "PNTR", "O"), | ||
# ("ตอน", "NCMN", "O"), | ||
# ("ห้า", "VSTA", "B-TIME"), | ||
# ("โมง", "NCMN", "I-TIME"), | ||
# ("เช้า", "ADVN", "I-TIME"), | ||
# ], | ||
# ) | ||
# self.assertEqual( | ||
# ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False), | ||
# [ | ||
# ("แมว", "O"), | ||
# ("ทำ", "O"), | ||
# ("อะไร", "O"), | ||
# ("ตอน", "O"), | ||
# ("ห้า", "B-TIME"), | ||
# ("โมง", "I-TIME"), | ||
# ("เช้า", "I-TIME"), | ||
# ], | ||
# ) |
Oops, something went wrong.