Skip to content

Commit

Permalink
Update NER
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Sep 17, 2022
1 parent e2a3404 commit 4c69dd4
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 11 deletions.
12 changes: 9 additions & 3 deletions pythainlp/corpus/default_db.json
Original file line number Diff line number Diff line change
@@ -1,21 +1,27 @@
{
"thainer": {
"name": "thainer",
"latest_version": "1.5.1",
"latest_version": "1.5.2",
"description": "Thai Named Entity Recognition",
"long_description": "Thai Named Entity Recognition",
"url": "https://github.com/wannaphong/thai-ner/",
"authors": [
"Wannaphong Phatthiyaphaibun"
],
"author_email": "wannaphong@kkumail.com",
"author_email": "wannaphong@yahoo.com",
"license": "cc-by-4.0",
"versions": {
"1.5.1": {
"filename": "thainer_crf_1_5_1.model",
"filename": "thainer_crf_1_5_2.model",
"download_url": "https://github.com/wannaphong/thai-ner/releases/download/1.5/thai-ner-1-5-newmm-lst20.crfsuite",
"md5": "-",
"pythainlp_version": ">=2.2.7"
},
"1.5.2": {
"filename": "thainer_crf_1_5_2.model",
"download_url": "https://github.com/wannaphong/thai-ner/releases/download/1.5.2/thainer_crf_1_5_2.model",
"md5": "-",
"pythainlp_version": ">=3.1.0"
}
}
}
Expand Down
Binary file added pythainlp/corpus/thainer_crf_1_5_2.model
Binary file not shown.
2 changes: 1 addition & 1 deletion pythainlp/tag/named_entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class NER:
* *tltk* - wrapper for `TLTK <https://pypi.org/project/tltk/>`_.
**Options for corpus**
* *thaimer* - Thai NER corpus
* *thainer* - Thai NER corpus
* *lst20* - lst20 corpus (wangchanberta only). \
`LST20 <https://aiforthai.in.th/corpus.php>`_ corpus \
by National Electronics and Computer Technology Center, Thailand \
Expand Down
17 changes: 10 additions & 7 deletions pythainlp/tag/thainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,27 @@ class ThaiNameTagger:
"""
Thai named-entity recognizer.
:param str version: Thai NER version.
It's support Thai NER 1.4 & 1.5.
The defualt value is `1.5`
It's support Thai NER 1.4, 1.5, 1.5.1 and 1.5.2.
The defualt value is `1.5.2`
:Example:
::
from pythainlp.tag.named_entity import ThaiNameTagger
thainer15 = ThaiNameTagger(version="1.5")
thainer15 = ThaiNameTagger(version="1.5.2")
thainer15.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.")
thainer14 = ThaiNameTagger(version="1.4")
thainer14.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.")
"""
def __init__(self, version: str = "1.5") -> None:
def __init__(self, version: str = "1.5.2") -> None:
"""
Thai named-entity recognizer.
:param str version: Thai NER version.
It's support Thai NER 1.4 & 1.5.
The defualt value is `1.5`
It's support Thai NER 1.4, 1.5, 1.5.1 and 1.5.2.
The defualt value is `1.5.2`
"""
self.crf = CRFTagger()

Expand All @@ -105,7 +105,10 @@ def __init__(self, version: str = "1.5") -> None:
self.pos_tag_name = "orchid_ud"
else:
self.crf.open(get_corpus_path(_CORPUS_NAME, version="1.5"))
self.pos_tag_name = "lst20"
if version=="1.5" or version=="1.5.1":
self.pos_tag_name = "lst20"
else:
self.pos_tag_name = "orchid"

def get_ner(
self, text: str, pos: bool = True, tag: bool = False
Expand Down

0 comments on commit 4c69dd4

Please sign in to comment.