Skip to content

Commit

Permalink
Merge pull request #234 from PyThaiNLP/issue224_refactor_tests
Browse files Browse the repository at this point in the history
Issue #224: Refactor the test file
  • Loading branch information
wannaphong committed Jun 27, 2019
2 parents 60d7cfd + 15fafd4 commit eb594f9
Show file tree
Hide file tree
Showing 12 changed files with 821 additions and 646 deletions.
652 changes: 6 additions & 646 deletions tests/__init__.py

Large diffs are not rendered by default.

76 changes: 76 additions & 0 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# -*- coding: utf-8 -*-

import datetime
import os
import sys
import unittest

from nltk.corpus import wordnet as wn

from pythainlp.corpus import (
_CORPUS_PATH,
conceptnet,
countries,
download,
provinces,
remove,
thai_negations,
thai_stopwords,
thai_syllables,
thai_words,
tnc,
ttc,
wordnet,
)
from pythainlp.corpus.common import _THAI_WORDS_FILENAME


class TestCorpusPackage(unittest.TestCase):

def test_conceptnet(self):
self.assertIsNotNone(conceptnet.edges("รัก"))

def test_corpus(self):
self.assertIsNotNone(countries())
self.assertIsNotNone(provinces())
self.assertIsNotNone(thai_negations())
self.assertIsNotNone(thai_stopwords())
self.assertIsNotNone(thai_syllables())
self.assertIsNotNone(thai_words())
download("test")
self.assertIsNotNone(remove("test"))
self.assertIsNotNone(remove("tnc_freq"))

def test_tnc(self):
self.assertIsNotNone(tnc.word_freqs())
self.assertIsNotNone(tnc.word_freq("นก"))

def test_ttc(self):
self.assertIsNotNone(ttc.word_freqs())

def test_wordnet(self):
self.assertIsNotNone(wordnet.langs())

self.assertEqual(
wordnet.synset("spy.n.01").lemma_names("tha"), ["สปาย", "สายลับ"]
)
self.assertIsNotNone(wordnet.synsets("นก"))
self.assertIsNotNone(wordnet.all_synsets(pos=wn.ADJ))

self.assertIsNotNone(wordnet.lemmas("นก"))
self.assertIsNotNone(wordnet.all_lemma_names(pos=wn.ADV))
self.assertIsNotNone(wordnet.lemma("cat.n.01.cat"))

self.assertEqual(wordnet.morphy("dogs"), "dog")

bird = wordnet.synset("bird.n.01")
mouse = wordnet.synset("mouse.n.01")
self.assertEqual(
wordnet.path_similarity(bird, mouse), bird.path_similarity(mouse)
)
self.assertEqual(
wordnet.wup_similarity(bird, mouse), bird.wup_similarity(mouse)
)

cat_key = wordnet.synsets("แมว")[0].lemmas()[0].key()
self.assertIsNotNone(wordnet.lemma_from_key(cat_key))
53 changes: 53 additions & 0 deletions tests/test_soundex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-

import datetime
import os
import sys
import unittest

from pythainlp.soundex import lk82, metasound, soundex, udom83


class TestSoundexPackage(unittest.TestCase):

def test_soundex(self):
self.assertIsNotNone(soundex("a", engine="lk82"))
self.assertIsNotNone(soundex("a", engine="udom83"))
self.assertIsNotNone(soundex("a", engine="metasound"))
self.assertIsNotNone(soundex("a", engine="XXX"))

self.assertEqual(lk82(None), "")
self.assertEqual(lk82(""), "")
self.assertEqual(lk82("เหตุ"), lk82("เหด"))
self.assertEqual(lk82("รถ"), "ร3000")
self.assertIsNotNone(lk82("เกาะ"))
self.assertIsNotNone(lk82("อุยกูร์"))
self.assertIsNotNone(lk82("หยากไย่"))
self.assertIsNotNone(lk82("หอ"))
self.assertEqual(lk82("น์"), "")

self.assertEqual(udom83(None), "")
self.assertEqual(udom83(""), "")
self.assertEqual(udom83("เหตุ"), udom83("เหด"))
self.assertEqual(udom83("รถ"), "ร800000")

self.assertEqual(metasound(None), "")
self.assertEqual(metasound(""), "")
self.assertEqual(metasound("เหตุ"), metasound("เหด"))
self.assertEqual(metasound("รักษ์"), metasound("รัก"))
self.assertEqual(metasound("บูรณะ"), "บ550")
self.assertEqual(metasound("คน"), "ค500")
self.assertEqual(metasound("คนA"), "ค500")
self.assertEqual(metasound("ดา"), "ด000")
self.assertIsNotNone(metasound("จะ"))
self.assertIsNotNone(metasound("ปา"))
self.assertIsNotNone(metasound("งง"))
self.assertIsNotNone(metasound("ลา"))
self.assertIsNotNone(metasound("มา"))
self.assertIsNotNone(metasound("ยา"))
self.assertIsNotNone(metasound("วา"))
self.assertIsNotNone(metasound("บูชา"))
self.assertIsNotNone(metasound("กมลา"))
self.assertIsNotNone(metasound("กาโวกาโว"))
self.assertIsNotNone(metasound("สุวรรณา"))
self.assertIsNotNone(metasound("ดอยบอย"))
25 changes: 25 additions & 0 deletions tests/test_spell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-

import datetime
import os
import sys
import unittest

from pythainlp.spell import NorvigSpellChecker, correct, spell


class TestSpellPackage(unittest.TestCase):

def test_spell(self):
self.assertEqual(spell(None), "")
self.assertEqual(spell(""), "")
self.assertIsNotNone(spell("เน้ร"))
self.assertIsNotNone(spell("เกสมร์"))

self.assertEqual(correct(None), "")
self.assertEqual(correct(""), "")
self.assertIsNotNone(correct("ทดสอง"))

checker = NorvigSpellChecker(dict_filter="")
self.assertIsNotNone(checker.dictionary())
self.assertGreaterEqual(checker.prob("มี"), 0)
25 changes: 25 additions & 0 deletions tests/test_summarize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-

import datetime
import os
import sys
import unittest

from pythainlp.summarize import summarize


class TestSummarizePackage(unittest.TestCase):

def test_summarize(self):
text = "อาหาร หมายถึง ของแข็งหรือของเหลว "
text += "ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว "
text += "จะทำให้เกิดพลังงานและความร้อนแก่ร่างกาย "
text += "ทำให้ร่างกายเจริญเติบโต "
text += "ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย "
text += "ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ "
text += "อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"
self.assertEqual(
summarize(text=text, n=1, engine="frequency"),
["อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย"],
)
self.assertIsNotNone(summarize(text, 1, engine="XX"))
113 changes: 113 additions & 0 deletions tests/test_tag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-

import datetime
import os
import sys
import unittest

from pythainlp.tag import perceptron, pos_tag, pos_tag_sents, unigram
from pythainlp.tag.locations import tag_provinces
from pythainlp.tag.named_entity import ThaiNameTagger
from pythainlp.tokenize import (
word_tokenize,
)


class TestTagPackage(unittest.TestCase):

def test_pos_tag(self):
tokens = ["ผม", "รัก", "คุณ"]

self.assertEqual(pos_tag(None), [])
self.assertEqual(pos_tag([]), [])

self.assertEqual(unigram.tag(None, corpus="pud"), [])
self.assertEqual(unigram.tag([], corpus="pud"), [])
self.assertEqual(unigram.tag(None, corpus="orchid"), [])
self.assertEqual(unigram.tag([], corpus="orchid"), [])

self.assertIsNotNone(
pos_tag(tokens, engine="unigram", corpus="orchid")
)
self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud"))
self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud"))
self.assertEqual(
pos_tag(word_tokenize("คุณกำลังประชุม"), engine="unigram"),
[("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")],
)

self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="orchid")
)
self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="pud")
)
self.assertEqual(perceptron.tag(None, corpus="pud"), [])
self.assertEqual(perceptron.tag([], corpus="pud"), [])
self.assertEqual(perceptron.tag(None, corpus="orchid"), [])
self.assertEqual(perceptron.tag([], corpus="orchid"), [])

self.assertIsNotNone(pos_tag(None, engine="artagger"))
self.assertIsNotNone(pos_tag([], engine="artagger"))
self.assertIsNotNone(pos_tag(tokens, engine="artagger"))
self.assertEqual(
pos_tag(word_tokenize("คุณกำลังประชุม"), engine="artagger"),
[("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")],
)

self.assertEqual(pos_tag_sents(None), [])
self.assertEqual(pos_tag_sents([]), [])
self.assertEqual(
pos_tag_sents([["ผม", "กิน", "ข้าว"], ["แมว", "วิ่ง"]]),
[
[("ผม", "PPRS"), ("กิน", "VACT"), ("ข้าว", "NCMN")],
[("แมว", "NCMN"), ("วิ่ง", "VACT")],
],
)

# ### pythainlp.tag.locations

def test_ner_locations(self):
self.assertEqual(
tag_provinces(["หนองคาย", "น่าอยู่"]),
[("หนองคาย", "B-LOCATION"), ("น่าอยู่", "O")],
)

# ### pythainlp.tag.named_entity

def test_ner(self):
ner = ThaiNameTagger()
self.assertEqual(ner.get_ner(""), [])
self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"))
self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
self.assertIsNotNone(
ner.get_ner(
"""คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
จังหวัดหนองคาย 43000"""
)
)
# self.assertEqual(
# ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"),
# [
# ("แมว", "NCMN", "O"),
# ("ทำ", "VACT", "O"),
# ("อะไร", "PNTR", "O"),
# ("ตอน", "NCMN", "O"),
# ("ห้า", "VSTA", "B-TIME"),
# ("โมง", "NCMN", "I-TIME"),
# ("เช้า", "ADVN", "I-TIME"),
# ],
# )
# self.assertEqual(
# ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False),
# [
# ("แมว", "O"),
# ("ทำ", "O"),
# ("อะไร", "O"),
# ("ตอน", "O"),
# ("ห้า", "B-TIME"),
# ("โมง", "I-TIME"),
# ("เช้า", "I-TIME"),
# ],
# )
Loading

0 comments on commit eb594f9

Please sign in to comment.