Skip to content

Commit

Permalink
Add family names
Browse files Browse the repository at this point in the history
  • Loading branch information
bact committed Sep 12, 2020
1 parent 5b8ad72 commit 04ff603
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/api/corpus.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Modules
.. autofunction:: thai_words
.. autofunction:: thai_syllables
.. autofunction:: thai_negations
.. autofunction:: thai_family_names
.. autofunction:: thai_female_names
.. autofunction:: thai_male_names
.. autofunction:: pythainlp.corpus.conceptnet.edges
Expand Down
2 changes: 2 additions & 0 deletions pythainlp/corpus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"get_corpus_path",
"provinces",
"remove",
"thai_family_names",
"thai_female_names",
"thai_male_names",
"thai_negations",
Expand Down Expand Up @@ -86,6 +87,7 @@ def corpus_db_path() -> str:
from pythainlp.corpus.common import (
countries,
provinces,
thai_family_names,
thai_female_names,
thai_male_names,
thai_negations,
Expand Down
22 changes: 21 additions & 1 deletion pythainlp/corpus/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
__all__ = [
"countries",
"provinces",
"thai_family_names",
"thai_female_names",
"thai_male_names",
"thai_negations",
Expand All @@ -14,9 +15,10 @@
"thai_words",
]

from pythainlp.corpus import get_corpus
from typing import Union

from pythainlp.corpus import get_corpus

_THAI_COUNTRIES = set()
_THAI_COUNTRIES_FILENAME = "countries_th.txt"

Expand All @@ -36,6 +38,8 @@
_THAI_NEGATIONS = set()
_THAI_NEGATIONS_FILENAME = "negations_th.txt"

_THAI_FAMLIY_NAMES = set()
_THAI_FAMLIY_NAMES_FILENAME = "family_names_th.txt"
_THAI_FEMALE_NAMES = set()
_THAI_FEMALE_NAMES_FILENAME = "person_names_female_th.txt"
_THAI_MALE_NAMES = set()
Expand Down Expand Up @@ -167,6 +171,22 @@ def thai_negations() -> frozenset:
return _THAI_NEGATIONS


def thai_family_names() -> frozenset:
"""
Return a frozenset of Thai family names
\n(See: `dev/pythainlp/corpus/family_names_th.txt\
<https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/family_names_th.txt>`_)
:return: :class:`frozenset` containing Thai family names.
:rtype: :class:`frozenset`
"""
global _THAI_FAMLIY_NAMES
if not _THAI_FAMLIY_NAMES:
_THAI_FAMLIY_NAMES = get_corpus(_THAI_FAMLIY_NAMES_FILENAME)

return _THAI_FAMLIY_NAMES


def thai_female_names() -> frozenset:
"""
Return a frozenset of Thai female names
Expand Down
3 changes: 3 additions & 0 deletions tests/test_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
get_corpus_path,
provinces,
remove,
thai_family_names,
thai_female_names,
thai_male_names,
thai_negations,
Expand Down Expand Up @@ -41,6 +42,8 @@ def test_corpus(self):
self.assertEqual(
len(provinces(details=False)), len(provinces(details=True))
)
self.assertIsInstance(thai_family_names(), frozenset)
self.assertIsInstance(list(thai_family_names())[0], str)
self.assertIsInstance(thai_female_names(), frozenset)
self.assertIsInstance(thai_male_names(), frozenset)

Expand Down

0 comments on commit 04ff603

Please sign in to comment.