diff --git a/docs/api/corpus.rst b/docs/api/corpus.rst index 7846bb91c..81c8295e9 100644 --- a/docs/api/corpus.rst +++ b/docs/api/corpus.rst @@ -22,7 +22,6 @@ Modules TNC --- -.. autofunction:: pythainlp.corpus.tnc.word_freq .. autofunction:: pythainlp.corpus.tnc.word_freqs TTC @@ -51,4 +50,4 @@ Definition ++++++++++ Synset - a set of synonyms that share a common meaning. \ No newline at end of file + a set of synonyms that share a common meaning. diff --git a/pythainlp/corpus/tnc.py b/pythainlp/corpus/tnc.py index 79aa2462b..a08cfd369 100644 --- a/pythainlp/corpus/tnc.py +++ b/pythainlp/corpus/tnc.py @@ -5,64 +5,14 @@ Credit: Korakot Chaovavanich‎ https://www.facebook.com/photo.php?fbid=363640477387469&set=gm.434330506948445&type=3&permPage=1 """ -import re from typing import List, Tuple -import requests from pythainlp.corpus import get_corpus -__all__ = ["word_freq", "word_freqs"] +__all__ = ["word_freqs"] _FILENAME = "tnc_freq.txt" - -def word_freq(word: str, domain: str = "all") -> int: - """ - - .. note:: - **Not officially supported.** - Get word frequency of a word by domain. - This function will make a query to the server of - Thai National Corpus. - Internet connection is required. - - .. warning:: - Currently (as of 29 April 2019) it is likely to return 0, - regardless of the word, as the service URL has been changed - and the code is not updated yet. - New URL is http://www.arts.chula.ac.th/~ling/tnc3/ - - :param string word: word - :param string domain: domain - """ - listdomain = { - "all": "", - "imaginative": "1", - "natural-pure-science": "2", - "applied-science": "3", - "social-science": "4", - "world-affairs-history": "5", - "commerce-finance": "6", - "arts": "7", - "belief-thought": "8", - "leisure": "9", - "others": "0", - } - url = "http://www.arts.chula.ac.th/~ling/tnc3/" - data = {"genre[]": "", "domain[]": listdomain[domain], "sortby": "perc", "p": word} - - r = requests.post(url, data=data) - - pat = re.compile(r'TOTAL.*?#ffffff">(.*?)', flags=re.DOTALL) - match = pat.search(r.text) - - n = 0 - if match: - n = int(match.group(1).strip()) - - return n - - def word_freqs() -> List[Tuple[str, int]]: """ Get word frequency from Thai National Corpus (TNC) diff --git a/tests/test_corpus.py b/tests/test_corpus.py index a340c78ac..fe9cebb96 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -42,7 +42,6 @@ def test_corpus(self): def test_tnc(self): self.assertIsNotNone(tnc.word_freqs()) - self.assertIsNotNone(tnc.word_freq("นก")) def test_ttc(self): self.assertIsNotNone(ttc.word_freqs())