Skip to content

Commit

Permalink
Add test case for train(save_loc=)
Browse files Browse the repository at this point in the history
  • Loading branch information
bact committed Aug 23, 2020
1 parent ee0f5e5 commit 83eee2e
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 5 deletions.
7 changes: 4 additions & 3 deletions pythainlp/tag/_tag_perceptron.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def train(
"""
self._make_tagdict(sentences)
self.model.classes = self.classes
for iter_ in range(nr_iter):
for _ in range(nr_iter):
c = 0
n = 0
for sentence in sentences:
Expand Down Expand Up @@ -192,5 +192,6 @@ def _make_tagdict(self, sentences: List[List[Tuple[str, str]]]) -> None:
self.tagdict[word] = tag


def _pc(n, d) -> float:
return (float(n) / d) * 100
# for logging
# def _pc(n, d) -> float:
# return (float(n) / d) * 100
31 changes: 29 additions & 2 deletions tests/test_tag.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import unittest
from os import path

from pythainlp.tag import (
PerceptronTagger,
Expand Down Expand Up @@ -99,11 +100,12 @@ def test_pos_tag(self):

def test_perceptron_tagger(self):
tagger = PerceptronTagger()
# train data, with "กิน" > 20 instances to trigger conditions in _make_tagdict()
data = [
[("คน", "N"), ("เดิน", "V")],
[("ฉัน", "N"), ("เดิน", "V")],
[("แมว", "N"), ("เดิน", "V")],
[("คน", "N"), ("วิ่ง", "V")],
[("แมว", "N"), ("วิ่ง", "V")],
[("ปลา", "N"), ("ว่าย", "V")],
[("นก", "N"), ("บิน", "V")],
[("คน", "N"), ("พูด", "V")],
Expand All @@ -118,8 +120,33 @@ def test_perceptron_tagger(self):
[("นก", "N"), ("กิน", "V"), ("ปลา", "N")],
[("คน", "N"), ("กิน", "V"), ("กาแฟ", "N")],
[("คน", "N"), ("คน", "V"), ("กาแฟ", "N")],
[("พระ", "N"), ("ฉัน", "V"), ("กาแฟ", "N")],
[("พระ", "N"), ("คน", "V"), ("กาแฟ", "N")],
[("พระ", "N"), ("ฉัน", "V"), ("ข้าว", "N")],
[("ฉัน", "N"), ("กิน", "V"), ("ข้าว", "N")],
[("เธอ", "N"), ("กิน", "V"), ("ปลา", "N")],
[("ปลา", "N"), ("กิน", "V"), ("แมลง", "N")],
[("แมวน้ำ", "N"), ("กิน", "V"), ("ปลา", "N")],
[("หนู", "N"), ("กิน", "V")],
[("เสือ", "N"), ("กิน", "V")],
[("ยีราฟ", "N"), ("กิน", "V")],
[("แรด", "N"), ("กิน", "V")],
[("หมู", "N"), ("กิน", "V")],
[("แมลง", "N"), ("กิน", "V")],
[("สิงโต", "N"), ("กิน", "V")],
[("เห็บ", "N"), ("กิน", "V")],
[("เหา", "N"), ("กิน", "V")],
[("เต่า", "N"), ("กิน", "V")],
[("กระต่าย", "N"), ("กิน", "V")],
[("จิ้งจก", "N"), ("กิน", "V")],
[("หมี", "N"), ("กิน", "V")],
[("หมา", "N"), ("กิน", "V")],
[("ตะพาบ", "N"), ("กิน", "V")],
[("เม่น", "N"), ("กิน", "V")],
[("หนอน", "N"), ("กิน", "V")],
]
tagger.train(data)
tagger.train(data, save_loc="temp.pkl")
self.assertTrue(path.exists("temp.pkl"))
self.assertEqual(len(tagger.tag(["นก", "เดิน"])), 2)

# ### pythainlp.tag.locations
Expand Down

0 comments on commit 83eee2e

Please sign in to comment.