Skip to content

Commit

Permalink
Update code
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Oct 12, 2022
1 parent 101ad5a commit be25a73
Show file tree
Hide file tree
Showing 6 changed files with 1 addition and 172 deletions.
2 changes: 1 addition & 1 deletion pythainlp/tag/perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def _pud_tagger():
def tag(words: List[str], corpus: str = "pud") -> List[Tuple[str, str]]:
"""
:param list words: a list of tokenized words
:param str corpus: corpus name (orchid, pud, or lst20)
:param str corpus: corpus name (orchid, pud)
:return: a list of tuples (word, POS tag)
:rtype: list[tuple[str, str]]
"""
Expand Down
7 changes: 0 additions & 7 deletions pythainlp/wangchanberta/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,6 @@ def get_ner(
i['word'].replace("<_>", " ").replace('▁', ''), i['entity']
) for i in self.json_ner if i['word'] != '▁'
]
elif self.grouped_entities and self.dataset_name == "lst20":
self.sent_ner = [
(
i['word'].replace("<_>", " ").replace('▁', ''),
i['entity_group'].replace('_', '-').replace('E-', 'I-')
) for i in self.json_ner
]
else:
self.sent_ner = [
(
Expand Down
108 changes: 0 additions & 108 deletions pythainlp/wangchanberta/postag.py

This file was deleted.

32 changes: 0 additions & 32 deletions tests/test_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ def test_pos_tag(self):
self.assertEqual(unigram.tag([], corpus="pud"), [])
self.assertEqual(unigram.tag(None, corpus="orchid"), [])
self.assertEqual(unigram.tag([], corpus="orchid"), [])
self.assertEqual(unigram.tag(None, corpus="lst20"), [])
self.assertEqual(unigram.tag([], corpus="lst20"), [])
self.assertIsNotNone(
pos_tag(tokens, engine="unigram", corpus="orchid")
)
Expand All @@ -58,11 +56,6 @@ def test_pos_tag(self):
)
self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud"))
self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud"))
self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="lst20"))
self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="lst20"))
self.assertIsNotNone(
pos_tag([""], engine="unigram", corpus="lst20_ud")
)
self.assertEqual(
pos_tag(["คุณ", "กำลัง", "ประชุม"], engine="unigram"),
[("คุณ", "PPRS"), ("กำลัง", "XVBM"), ("ประชุม", "VACT")],
Expand All @@ -81,8 +74,6 @@ def test_pos_tag(self):
self.assertEqual(perceptron.tag([], corpus="orchid_ud"), [])
self.assertEqual(perceptron.tag(None, corpus="pud"), [])
self.assertEqual(perceptron.tag([], corpus="pud"), [])
self.assertEqual(perceptron.tag(None, corpus="lst20"), [])
self.assertEqual(perceptron.tag([], corpus="lst20"), [])
self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="orchid")
)
Expand All @@ -92,18 +83,6 @@ def test_pos_tag(self):
self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="pud")
)
self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="lst20")
)
self.assertIsNotNone(
pos_tag(tokens, engine="perceptron", corpus="lst20_ud")
)
self.assertEqual(
pos_tag([], engine="wangchanberta", corpus="lst20"), []
)
self.assertIsNotNone(
pos_tag(tokens, engine="wangchanberta", corpus="lst20")
)
self.assertIsNotNone(
pos_tag(tokens, engine="tltk")
)
Expand All @@ -117,14 +96,6 @@ def test_pos_tag(self):
[("แมว", "NCMN"), ("วิ่ง", "VACT")],
],
)
with self.assertRaises(ValueError):
self.assertIsNotNone(
pos_tag(tokens, engine="wangchanberta", corpus="lst20_ud")
)
with self.assertRaises(ValueError):
self.assertIsNotNone(
tltk.pos_tag(tokens, corpus="lst20")
)

# ### pythainlp.tag.PerceptronTagger

Expand Down Expand Up @@ -362,9 +333,6 @@ def test_NER_class(self):
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า"))
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า", tag=True))
ner = NER(engine="lst20_onnx")
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า"))
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า", tag=True))
ner = NER(engine="tltk")
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า"))
self.assertIsNotNone(ner.tag("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
Expand Down
1 change: 0 additions & 1 deletion tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,6 @@ def test_nercut(self):
self.assertEqual(nercut.segment("ทันแน่ๆ"), ['ทัน', 'แน่ๆ'])
self.assertEqual(nercut.segment("%1ครั้ง"), ['%', '1', 'ครั้ง'])
self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ['ทุ๊กกโคนน'])
self.assertEqual(nercut.segment("อือหือ"), ['อือหือ'])
self.assertEqual(
nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ"),
['อย่าลืมอัพการ์ดนะจ๊ะ']
Expand Down
23 changes: 0 additions & 23 deletions tests/test_wangchanberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,29 +28,6 @@ def test_thainer_wangchanberta(self):
ner.get_ner("I คิด therefore I am ผ็ฎ์", tag=True)
)

def test_lst20_ner_wangchanberta(self):
ner = ThaiNameTagger(dataset_name="lst20")
self.assertIsNotNone(
ner.get_ner("I คิด therefore I am ผ็ฎ์")
)
self.assertIsNotNone(
ner.get_ner("I คิด therefore I am ผ็ฎ์", tag=True)
)
self.assertIsNotNone(
ner.get_ner(
"โรงเรียนสวนกุหลาบเป็นโรงเรียนที่ดี แต่ไม่มีสวนกุหลาบ",
tag=True
)
)

ner = ThaiNameTagger(
dataset_name="lst20",
grouped_entities=False
)
self.assertIsNotNone(
ner.get_ner("I คิด therefore I am ผ็ฎ์", tag=True)
)

def test_segment_wangchanberta(self):
self.assertIsNotNone(
segment("I คิด therefore I am ผ็ฎ์")
Expand Down

0 comments on commit be25a73

Please sign in to comment.