Fix PEP8, add corpus details

PyThaiNLP · Aug 23, 2020 · 89d4282 · 89d4282
1 parent 9cc4dad
commit 89d4282
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 5 deletions.
diff --git a/pythainlp/corpus/corpus_license.md b/pythainlp/corpus/corpus_license.md
@@ -18,7 +18,6 @@ negations_th.txt | Negation word list
 stopwords_th.txt | Stop word list
 syllables_th.txt | List of Thai syllables
 thailand_provinces_th.csv | List of Thailand provinces in Thai
-thailand_provinces_th.txt | List of Thailand provinces in Thai
 tnc_freq.txt | Words and their frequencies, from Thai National Corpus
 ttc_freq.txt | Words and their frequencies, from Thai Textbook Corpus
 words_th.txt | List of Thai words
@@ -46,9 +45,11 @@ https://creativecommons.org/licenses/by/4.0/
 
 Filename | Description
 ---------|------------
-sentenceseg_crfcut.model | Sentence segmentation model
-pos_ud_perceptron.pkl | Part-of-speech tagging model
-pos_ud_unigram.json | Part-of-speech tagging model
+pos_orchid_perceptron.pkl | Part-of-speech tagging model, trained from ORCHID data, using perceptron
+pos_orchid_unigram.json | Part-of-speech tagging model, trained from ORCHID data, using unigram
+pos_ud_perceptron.pkl | Part-of-speech tagging model, trained from Parallel Universal Dependencies treebank, using perceptron
+pos_ud_unigram.json | Part-of-speech tagging model, trained from Parallel Universal Dependencies treebank, using unigram
+sentenceseg_crfcut.model | Sentence segmentation model, trained from TED subtitles, using CRF
 
 
 ## Thai WordNet

diff --git a/tests/test_tag.py b/tests/test_tag.py
@@ -100,7 +100,8 @@ def test_pos_tag(self):
 
     def test_perceptron_tagger(self):
         tagger = PerceptronTagger()
-        # train data, with "กิน" > 20 instances to trigger conditions in _make_tagdict()
+        # train data, with "กิน" > 20 instances to trigger conditions
+        # in _make_tagdict()
         data = [
             [("คน", "N"), ("เดิน", "V")],
             [("ฉัน", "N"), ("เดิน", "V")],