From c220e5371a5aa9d8aeac5c5c142fc18b0122ba6d Mon Sep 17 00:00:00 2001 From: Arthit Suriyawongkul Date: Sun, 10 Dec 2023 12:02:15 +0000 Subject: [PATCH] Add license info Add license info to /tests and README_TH --- README_TH.md | 46 +++++++++-- pythainlp/tag/pos_tag.py | 33 ++++---- tests/__init__.py | 2 + tests/test_ancient.py | 28 +++---- tests/test_augment.py | 2 + tests/test_benchmarks.py | 8 +- tests/test_classify.py | 3 + tests/test_cli.py | 8 +- tests/test_coref.py | 2 + tests/test_corpus.py | 29 ++++--- tests/test_el.py | 3 + tests/test_generate.py | 2 + tests/test_khavee.py | 46 ++++++----- tests/test_misspell.py | 10 +-- tests/test_parse.py | 6 +- tests/test_soundex.py | 14 +++- tests/test_spell.py | 8 +- tests/test_summarize.py | 14 +++- tests/test_tag.py | 79 +++++++++++-------- tests/test_tokenize.py | 163 ++++++++++++++++++--------------------- tests/test_tools.py | 2 + tests/test_util.py | 3 + tests/test_wsd.py | 3 + 23 files changed, 305 insertions(+), 209 deletions(-) diff --git a/README_TH.md b/README_TH.md index aec1a407b..838027f2b 100644 --- a/README_TH.md +++ b/README_TH.md @@ -123,13 +123,11 @@ thainlp help ## การอ้างอิง -ถ้าคุณใช้ `PyThaiNLP` ในโปรเจคหรืองานวิจัยของคุณ คุณสามารถอ้างอิงได้ตามนี้ +หากคุณใช้ซอฟต์แวร์ `PyThaiNLP` ในโครงงานหรืองานวิจัยของคุณ คุณสามารถอ้างอิงได้ตามนี้ -``` Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Suriyawongkul, Lalita Lowphansirikul, & Pattarawat Chormai. (2016, Jun 27). PyThaiNLP: Thai Natural Language Processing in Python. Zenodo. http://doi.org/10.5281/zenodo.3519354 -``` -หรือ BibTeX entry: +โดยสามารถใช้ BibTeX นี้: ``` bib @misc{pythainlp, @@ -143,6 +141,40 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur } ``` +บทความของเราในงานประชุมวิชาการ [NLP-OSS 2023](https://nlposs.github.io/2023/): + +Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Suriyawongkul, Lalita Lowphansirikul, Pattarawat Chormai, Peerat Limkonchotiwat, Thanathip Suntorntip, and Can Udomcharoenchaikit. 2023. [PyThaiNLP: Thai Natural Language Processing in Python.](https://aclanthology.org/2023.nlposs-1.4) In Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023), pages 25–36, Singapore, Singapore. Empirical Methods in Natural Language Processing. + +โดยสามารถใช้ BibTeX นี้: + +```bib +@inproceedings{phatthiyaphaibun-etal-2023-pythainlp, + title = "{P}y{T}hai{NLP}: {T}hai Natural Language Processing in Python", + author = "Phatthiyaphaibun, Wannaphong and + Chaovavanich, Korakot and + Polpanumas, Charin and + Suriyawongkul, Arthit and + Lowphansirikul, Lalita and + Chormai, Pattarawat and + Limkonchotiwat, Peerat and + Suntorntip, Thanathip and + Udomcharoenchaikit, Can", + editor = "Tan, Liling and + Milajevs, Dmitrijs and + Chauhan, Geeticka and + Gwinnup, Jeremy and + Rippeth, Elijah", + booktitle = "Proceedings of the 3rd Workshop for Natural Language Processing Open Source Software (NLP-OSS 2023)", + month = dec, + year = "2023", + address = "Singapore, Singapore", + publisher = "Empirical Methods in Natural Language Processing", + url = "https://aclanthology.org/2023.nlposs-1.4", + pages = "25--36", + abstract = "We present PyThaiNLP, a free and open-source natural language processing (NLP) library for Thai language implemented in Python. It provides a wide range of software, models, and datasets for Thai language. We first provide a brief historical context of tools for Thai language prior to the development of PyThaiNLP. We then outline the functionalities it provided as well as datasets and pre-trained language models. We later summarize its development milestones and discuss our experience during its development. We conclude by demonstrating how industrial and research communities utilize PyThaiNLP in their work. The library is freely available at https://github.com/pythainlp/pythainlp.", +} +``` + ## ร่วมสนับสนุน PyThaiNLP - กรุณา fork แล้วพัฒนาต่อ จากนั้นสร้าง pull request กลับมา :) @@ -157,10 +189,10 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur | | สัญญาอนุญาต | |:---|:----| -| PyThaiNLP Source Code and Notebooks | [Apache Software License 2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE) | -| Corpora, datasets, and documentations created by PyThaiNLP | [Creative Commons Zero 1.0 Universal Public Domain Dedication License (CC0)](https://creativecommons.org/publicdomain/zero/1.0/)| +| ต้นรหัสซอร์สโค้ดและโน๊ตบุ๊กของ PyThaiNLP | [Apache Software License 2.0](https://github.com/PyThaiNLP/pythainlp/blob/dev/LICENSE) | +| ฐานข้อมูลภาษา ชุดข้อมูล และเอกสารที่สร้างโดยโครงการ PyThaiNLP | [Creative Commons Zero 1.0 Universal Public Domain Dedication License (CC0)](https://creativecommons.org/publicdomain/zero/1.0/)| | Language models created by PyThaiNLP | [Creative Commons Attribution 4.0 International Public License (CC-by)](https://creativecommons.org/licenses/by/4.0/) | -| Other corpora and models that may included with PyThaiNLP | See [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) | +| สำหรับฐานข้อมูลภาษาและโมเดลอื่นที่อาจมาพร้อมกับซอฟต์แวร์ PyThaiNLP | ดู [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) | ## บัตรโมเดล diff --git a/pythainlp/tag/pos_tag.py b/pythainlp/tag/pos_tag.py index 369161c7d..7d345198f 100644 --- a/pythainlp/tag/pos_tag.py +++ b/pythainlp/tag/pos_tag.py @@ -4,7 +4,6 @@ from typing import List, Tuple - def pos_tag( words: List[str], engine: str = "perceptron", corpus: str = "orchid" ) -> List[Tuple[str, str]]: @@ -169,10 +168,10 @@ def pos_tag_sents( def pos_tag_transformers( - sentence: str, + sentence: str, engine: str = "bert", corpus: str = "blackboard", -)->List[List[Tuple[str, str]]]: +) -> List[List[Tuple[str, str]]]: """ Marks sentences with part-of-speech (POS) tags. @@ -202,29 +201,33 @@ def pos_tag_transformers( """ try: - from transformers import AutoModelForTokenClassification, \ - AutoTokenizer, TokenClassificationPipeline + from transformers import ( + AutoModelForTokenClassification, + AutoTokenizer, + TokenClassificationPipeline, + ) except ImportError: raise ImportError( - "Not found transformers! Please install transformers by pip install transformers") + "Not found transformers! Please install transformers by pip install transformers" + ) if not sentence: return [] _blackboard_support_engine = { - "bert" : "lunarlist/pos_thai", + "bert": "lunarlist/pos_thai", } _pud_support_engine = { - "wangchanberta" : "Pavarissy/wangchanberta-ud-thai-pud-upos", - "mdeberta" : "Pavarissy/mdeberta-v3-ud-thai-pud-upos", + "wangchanberta": "Pavarissy/wangchanberta-ud-thai-pud-upos", + "mdeberta": "Pavarissy/mdeberta-v3-ud-thai-pud-upos", } - if corpus == 'blackboard' and engine in _blackboard_support_engine.keys(): + if corpus == "blackboard" and engine in _blackboard_support_engine.keys(): base_model = _blackboard_support_engine.get(engine) model = AutoModelForTokenClassification.from_pretrained(base_model) tokenizer = AutoTokenizer.from_pretrained(base_model) - elif corpus == 'pud' and engine in _pud_support_engine.keys(): + elif corpus == "pud" and engine in _pud_support_engine.keys(): base_model = _pud_support_engine.get(engine) model = AutoModelForTokenClassification.from_pretrained(base_model) tokenizer = AutoTokenizer.from_pretrained(base_model) @@ -235,8 +238,10 @@ def pos_tag_transformers( ) ) - pipeline = TokenClassificationPipeline(model=model, tokenizer=tokenizer, aggregation_strategy="simple") + pipeline = TokenClassificationPipeline( + model=model, tokenizer=tokenizer, aggregation_strategy="simple" + ) outputs = pipeline(sentence) - word_tags = [[(tag['word'], tag['entity_group']) for tag in outputs]] - return word_tags \ No newline at end of file + word_tags = [[(tag["word"], tag["entity_group"]) for tag in outputs]] + return word_tags diff --git a/tests/__init__.py b/tests/__init__.py index 3b60f0c32..c6b0dd672 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 """ Unit test. diff --git a/tests/test_ancient.py b/tests/test_ancient.py index d63218442..10acb6bc9 100644 --- a/tests/test_ancient.py +++ b/tests/test_ancient.py @@ -1,20 +1,22 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.ancient import aksonhan_to_current class TestAncientPackage(unittest.TestCase): def test_aksonhan_to_current(self): - self.assertEqual(aksonhan_to_current("ก"), 'ก') - self.assertEqual(aksonhan_to_current("กก"), 'กก') - self.assertEqual(aksonhan_to_current("ถนน"), 'ถนน') - self.assertEqual(aksonhan_to_current("จกก"), 'จัก') - self.assertEqual(aksonhan_to_current("ดง่ง"), 'ดั่ง') - self.assertEqual(aksonhan_to_current("นน้น"), 'นั้น') - self.assertEqual(aksonhan_to_current("ขดด"), 'ขัด') - self.assertEqual(aksonhan_to_current("ตรสส"), 'ตรัส') - self.assertEqual(aksonhan_to_current("ขบบ"), 'ขับ') - self.assertEqual(aksonhan_to_current("วนน"), 'วัน') - self.assertEqual(aksonhan_to_current("หลงง"), 'หลัง') - self.assertEqual(aksonhan_to_current("บงงคบบ"), 'บังคับ') - self.assertEqual(aksonhan_to_current("สรรเพชญ"), 'สรรเพชญ') + self.assertEqual(aksonhan_to_current("ก"), "ก") + self.assertEqual(aksonhan_to_current("กก"), "กก") + self.assertEqual(aksonhan_to_current("ถนน"), "ถนน") + self.assertEqual(aksonhan_to_current("จกก"), "จัก") + self.assertEqual(aksonhan_to_current("ดง่ง"), "ดั่ง") + self.assertEqual(aksonhan_to_current("นน้น"), "นั้น") + self.assertEqual(aksonhan_to_current("ขดด"), "ขัด") + self.assertEqual(aksonhan_to_current("ตรสส"), "ตรัส") + self.assertEqual(aksonhan_to_current("ขบบ"), "ขับ") + self.assertEqual(aksonhan_to_current("วนน"), "วัน") + self.assertEqual(aksonhan_to_current("หลงง"), "หลัง") + self.assertEqual(aksonhan_to_current("บงงคบบ"), "บังคับ") + self.assertEqual(aksonhan_to_current("สรรเพชญ"), "สรรเพชญ") diff --git a/tests/test_augment.py b/tests/test_augment.py index 51dc89082..82cb2e244 100644 --- a/tests/test_augment.py +++ b/tests/test_augment.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest import nltk diff --git a/tests/test_benchmarks.py b/tests/test_benchmarks.py index 85a67ebd1..b2bbcdba1 100644 --- a/tests/test_benchmarks.py +++ b/tests/test_benchmarks.py @@ -1,3 +1,7 @@ +# -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + import unittest import numpy as np @@ -63,8 +67,8 @@ def test_count_correctly_tokenised_words(self): rb = list(word_tokenization._find_word_boundaries(ref_sample)) # in binary [{0, 1}, ...] - correctly_tokenized_words = word_tokenization._find_words_correctly_tokenised( - rb, sb + correctly_tokenized_words = ( + word_tokenization._find_words_correctly_tokenised(rb, sb) ) self.assertEqual( diff --git a/tests/test_classify.py b/tests/test_classify.py index c45049f7a..7029e9d73 100644 --- a/tests/test_classify.py +++ b/tests/test_classify.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + import unittest from pythainlp.classify import GzipModel diff --git a/tests/test_cli.py b/tests/test_cli.py index c67e33e53..dbf2368f8 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from argparse import ArgumentError @@ -41,7 +43,7 @@ def test_cli_benchmark(self): "./tests/data/input.txt", "--test-file", "./tests/data/test.txt", - "--save-details" + "--save-details", ] ) ) @@ -117,9 +119,7 @@ def test_cli_tokenize(self): self.assertEqual(ex.exception.code, 2) self.assertIsNotNone( - cli.tokenize.App( - ["thainlp", "tokenize", "NOT_EXIST", "ไม่มีอยู่ จริง"] - ) + cli.tokenize.App(["thainlp", "tokenize", "NOT_EXIST", "ไม่มีอยู่ จริง"]) ) self.assertIsNotNone( cli.tokenize.App( diff --git a/tests/test_coref.py b/tests/test_coref.py index 7cf641131..0f8844976 100644 --- a/tests/test_coref.py +++ b/tests/test_coref.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.coref import coreference_resolution diff --git a/tests/test_corpus.py b/tests/test_corpus.py index 9938b1ce4..01c0c77ef 100644 --- a/tests/test_corpus.py +++ b/tests/test_corpus.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import os import unittest @@ -23,14 +25,15 @@ thai_icu_words, thai_male_names, thai_negations, + thai_orst_words, thai_stopwords, thai_syllables, - thai_synonym, + thai_synonyms, + thai_volubilis_words, + thai_wikipedia_titles, thai_words, tnc, ttc, - volubilis, - wikipedia_titles, wordnet, ) from pythainlp.corpus.util import revise_newmm_default_wordset @@ -41,24 +44,26 @@ def test_conceptnet(self): self.assertIsNotNone(conceptnet.edges("รัก")) def test_corpus(self): - self.assertIsInstance(thai_icu_words(), frozenset) - self.assertGreater(len(thai_icu_words()), 0) self.assertIsInstance(thai_negations(), frozenset) self.assertGreater(len(thai_negations()), 0) self.assertIsInstance(thai_stopwords(), frozenset) self.assertGreater(len(thai_stopwords()), 0) self.assertIsInstance(thai_syllables(), frozenset) self.assertGreater(len(thai_syllables()), 0) - self.assertIsInstance(thai_synonym(), dict) - self.assertGreater(len(thai_synonym()), 0) + self.assertIsInstance(thai_synonyms(), dict) + self.assertGreater(len(thai_synonyms()), 0) + + self.assertIsInstance(thai_icu_words(), frozenset) + self.assertGreater(len(thai_icu_words()), 0) + self.assertIsInstance(thai_orst_words(), frozenset) + self.assertGreater(len(thai_orst_words()), 0) + self.assertIsInstance(thai_volubilis_words(), frozenset) + self.assertGreater(len(thai_volubilis_words()), 0) + self.assertIsInstance(thai_wikipedia_titles(), frozenset) + self.assertGreater(len(thai_wikipedia_titles()), 0) self.assertIsInstance(thai_words(), frozenset) self.assertGreater(len(thai_words()), 0) - self.assertIsInstance(volubilis(), frozenset) - self.assertGreater(len(volubilis()), 0) - self.assertIsInstance(wikipedia_titles(), frozenset) - self.assertGreater(len(wikipedia_titles()), 0) - self.assertIsInstance(countries(), frozenset) self.assertGreater(len(countries()), 0) self.assertIsInstance(provinces(), frozenset) diff --git a/tests/test_el.py b/tests/test_el.py index 88168c0ca..52d5845ee 100644 --- a/tests/test_el.py +++ b/tests/test_el.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + import unittest from pythainlp.el import EntityLinker diff --git a/tests/test_generate.py b/tests/test_generate.py index 6f6646bcd..219b271a2 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_khavee.py b/tests/test_khavee.py index 2de1b09f2..d51f6b376 100644 --- a/tests/test_khavee.py +++ b/tests/test_khavee.py @@ -1,43 +1,51 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.khavee import KhaveeVerifier kv = KhaveeVerifier() + class TestKhaveePackage(unittest.TestCase): def test_check_sara(self): - self.assertEqual(kv.check_sara('เริง'), 'เออ') + self.assertEqual(kv.check_sara("เริง"), "เออ") def test_check_marttra(self): - self.assertEqual(kv.check_marttra('สาว'), 'เกอว') + self.assertEqual(kv.check_marttra("สาว"), "เกอว") def test_is_sumpus(self): - self.assertTrue(kv.is_sumpus('สรร', 'อัน')) - self.assertFalse(kv.is_sumpus('สรร', 'แมว')) + self.assertTrue(kv.is_sumpus("สรร", "อัน")) + self.assertFalse(kv.is_sumpus("สรร", "แมว")) def test_check_klon(self): self.assertEqual( kv.check_klon( - 'ฉันชื่อหมูกรอบ ฉันชอบกินไก่ แล้วก็วิ่งไล่ หมาชื่อนํ้าทอง ลคคนเก่ง เอ๋งเอ๋งคะนอง \ - มีคนจับจอง เขาชื่อน้องเธียร', - k_type=4 + "ฉันชื่อหมูกรอบ ฉันชอบกินไก่ แล้วก็วิ่งไล่ หมาชื่อนํ้าทอง \ + ลคคนเก่ง เอ๋งเอ๋งคะนอง มีคนจับจอง เขาชื่อน้องเธียร", + k_type=4, ), - 'The poem is correct according to the principle.' + "The poem is correct according to the principle.", ) self.assertEqual( kv.check_klon( - 'ฉันชื่อหมูกรอบ ฉันชอบกินไก่ แล้วก็วิ่งไล่ หมาชื่อนํ้าทอง ลคคนเก่ง \ - เอ๋งเอ๋งเสียงหมา มีคนจับจอง เขาชื่อน้องเธียร', - k_type=4 - ), [ - "Can't find rhyme between paragraphs ('หมา', 'จอง') in paragraph 2", - "Can't find rhyme between paragraphs ('หมา', 'ทอง') in paragraph 2" - ] + "ฉันชื่อหมูกรอบ ฉันชอบกินไก่ แล้วก็วิ่งไล่ หมาชื่อนํ้าทอง \ + ลคคนเก่ง เอ๋งเอ๋งเสียงหมา มีคนจับจอง เขาชื่อน้องเธียร", + k_type=4, + ), + [ + "Can't find rhyme between paragraphs \ + ('หมา', 'จอง') in paragraph 2", + "Can't find rhyme between paragraphs \ + ('หมา', 'ทอง') in paragraph 2", + ], ) def test_check_aek_too(self): - self.assertEqual(kv.check_aek_too('ไกด์'), False) - self.assertEqual(kv.check_aek_too('ไก่'), 'aek') - self.assertEqual(kv.check_aek_too('ไก้'), 'too') - self.assertTrue(kv.check_aek_too(['หนม', 'หน่ม', 'หน้ม']), [False, 'aek', 'too']) + self.assertEqual(kv.check_aek_too("ไกด์"), False) + self.assertEqual(kv.check_aek_too("ไก่"), "aek") + self.assertEqual(kv.check_aek_too("ไก้"), "too") + self.assertTrue( + kv.check_aek_too(["หนม", "หน่ม", "หน้ม"]), [False, "aek", "too"] + ) diff --git a/tests/test_misspell.py b/tests/test_misspell.py index f7889113b..0e60add8d 100644 --- a/tests/test_misspell.py +++ b/tests/test_misspell.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest import numpy as np @@ -18,10 +20,7 @@ def _count_difference(st1, st2): class TestTextMisspellPackage(unittest.TestCase): def setUp(self): - self.texts = [ - "เรารักคุณมากที่สุดในโลก", - "เราอยู่ที่มหาวิทยาลัยขอนแก่น" - ] + self.texts = ["เรารักคุณมากที่สุดในโลก", "เราอยู่ที่มหาวิทยาลัยขอนแก่น"] def test_misspell_naive(self): for text in self.texts: @@ -42,8 +41,7 @@ def test_misspell_with_ratio_0_percent(self): diff = _count_difference(text, misspelled) self.assertEqual( - diff, 0, - "we shouldn't have any misspell with ratio=0." + diff, 0, "we shouldn't have any misspell with ratio=0." ) def test_misspell_with_ratio_50_percent(self): diff --git a/tests/test_parse.py b/tests/test_parse.py index 41e8a166b..7f6264419 100644 --- a/tests/test_parse.py +++ b/tests/test_parse.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.parse import dependency_parsing @@ -7,7 +9,9 @@ class TestParsePackage(unittest.TestCase): def test_dependency_parsing(self): self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="esupar")) - self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="esupar", tag="list")) + self.assertIsNotNone( + dependency_parsing("ผมเป็นคนดี", engine="esupar", tag="list") + ) # self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="transformers_ud")) # self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="transformers_ud", tag="list")) # self.assertIsNotNone(dependency_parsing("ผมเป็นคนดี", engine="spacy_thai")) diff --git a/tests/test_soundex.py b/tests/test_soundex.py index 7bf00af7d..bc01fcf2f 100644 --- a/tests/test_soundex.py +++ b/tests/test_soundex.py @@ -1,8 +1,16 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest -from pythainlp.soundex import lk82, metasound, soundex, udom83, prayut_and_somchaip +from pythainlp.soundex import ( + lk82, + metasound, + prayut_and_somchaip, + soundex, + udom83, +) from pythainlp.soundex.sound import word_approximation, audio_vector @@ -13,7 +21,7 @@ def test_soundex(self): self.assertIsNotNone(soundex("a", engine="metasound")) self.assertEqual( soundex("vp", engine="prayut_and_somchaip"), - soundex("วีพี", engine="prayut_and_somchaip") + soundex("วีพี", engine="prayut_and_somchaip"), ) self.assertIsNotNone(soundex("a", engine="XXX")) @@ -76,7 +84,7 @@ def test_soundex(self): self.assertIsNotNone(prayut_and_somchaip("ว้าว")) def test_word_approximation(self): - self.assertIsNotNone(word_approximation("รถ", ["รส","รด","คน"])) + self.assertIsNotNone(word_approximation("รถ", ["รส", "รด", "คน"])) def test_audio_vector(self): self.assertIsNotNone(audio_vector("คน")) diff --git a/tests/test_spell.py b/tests/test_spell.py index 55043930b..099ab6bd3 100644 --- a/tests/test_spell.py +++ b/tests/test_spell.py @@ -1,13 +1,15 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from pythainlp.spell import ( NorvigSpellChecker, correct, + correct_sent, spell, spell_sent, - correct_sent, symspellpy, ) @@ -139,6 +141,4 @@ def test_correct_sent(self): self.assertIsNotNone( correct_sent(self.spell_sent, engine="wanchanberta_thai_grammarly") ) - self.assertIsNotNone( - symspellpy.correct_sent(self.spell_sent) - ) + self.assertIsNotNone(symspellpy.correct_sent(self.spell_sent)) diff --git a/tests/test_summarize.py b/tests/test_summarize.py index a1b919953..c64f06321 100644 --- a/tests/test_summarize.py +++ b/tests/test_summarize.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest @@ -23,7 +25,9 @@ def test_summarize(self): # self.assertIsNotNone(summarize(text, engine="mt5-small")) # self.assertIsNotNone(summarize([])) # self.assertIsNotNone(summarize(text, 1, engine="mt5-small")) - self.assertIsNotNone(summarize(text, 1, engine="mt5-cpe-kmutt-thai-sentence-sum")) + self.assertIsNotNone( + summarize(text, 1, engine="mt5-cpe-kmutt-thai-sentence-sum") + ) self.assertIsNotNone(summarize(text, 1, engine="XX")) with self.assertRaises(ValueError): self.assertIsNotNone(summarize(text, 1, engine="mt5-cat")) @@ -49,7 +53,9 @@ def test_keyword_extraction(self): # test another engine for max_kw in (5, 10): - keywords = extract_keywords(text, engine="frequency", max_keywords=max_kw) + keywords = extract_keywords( + text, engine="frequency", max_keywords=max_kw + ) self.assertEqual(len(keywords), max_kw) # test invalid engine @@ -86,7 +92,9 @@ def test_keybert(self): # test ngram range ng_ranges = [(1, 1), (1, 2), (2, 2), (3, 3)] for ng_min, ng_max in ng_ranges: - keywords = keybert.extract_keywords(text, keyphrase_ngram_range=(ng_min, ng_max)) + keywords = keybert.extract_keywords( + text, keyphrase_ngram_range=(ng_min, ng_max) + ) for kw in keywords: self.assertTrue(ng_min <= len(word_tokenize(kw)) <= ng_max) diff --git a/tests/test_tag.py b/tests/test_tag.py index b5529ec5b..5ea859667 100644 --- a/tests/test_tag.py +++ b/tests/test_tag.py @@ -1,22 +1,23 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest from os import path from pythainlp.tag import ( - chunk_parse, + NER, + NNER, PerceptronTagger, + chunk_parse, perceptron, pos_tag, pos_tag_sents, pos_tag_transformers, - unigram, + tag_provinces, tltk, - NER, - NNER, - + unigram, ) -from pythainlp.tag.locations import tag_provinces from pythainlp.tag.thainer import ThaiNameTagger @@ -58,8 +59,12 @@ def test_pos_tag(self): ) self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="pud")) self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="pud")) - self.assertIsNotNone(pos_tag(tokens, engine="unigram", corpus="blackboard")) - self.assertIsNotNone(pos_tag([""], engine="unigram", corpus="blackboard")) + self.assertIsNotNone( + pos_tag(tokens, engine="unigram", corpus="blackboard") + ) + self.assertIsNotNone( + pos_tag([""], engine="unigram", corpus="blackboard") + ) self.assertIsNotNone( pos_tag([""], engine="unigram", corpus="blackboard_ud") ) @@ -98,9 +103,7 @@ def test_pos_tag(self): self.assertIsNotNone( pos_tag(tokens, engine="perceptron", corpus="blackboard_ud") ) - self.assertIsNotNone( - pos_tag(tokens, engine="tltk") - ) + self.assertIsNotNone(pos_tag(tokens, engine="tltk")) self.assertEqual(pos_tag_sents(None), []) self.assertEqual(pos_tag_sents([]), []) @@ -112,9 +115,7 @@ def test_pos_tag(self): ], ) with self.assertRaises(ValueError): - self.assertIsNotNone( - tltk.pos_tag(tokens, corpus="blackboard") - ) + self.assertIsNotNone(tltk.pos_tag(tokens, corpus="blackboard")) # ### pythainlp.tag.PerceptronTagger @@ -269,7 +270,7 @@ def test_ner(self): "คณะวิศวกรรมศาสตร์ ", ) - '''self.assertEqual( + """self.assertEqual( ner.get_ner( "มาตรา 80 ปพพ ให้ใช้อัตราภาษีร้อยละ 10.0" " ในการคำนวณภาษีมูลค่าเพิ่ม", @@ -278,7 +279,7 @@ def test_ner(self): "มาตรา 80 ปพพ " "ให้ใช้อัตราภาษีร้อยละ 10.0" " ในการคำนวณภาษีมูลค่าเพิ่ม", - )''' + )""" self.assertEqual( ner.get_ner("ยาว 20 เซนติเมตร", tag=True), @@ -293,14 +294,12 @@ def test_ner(self): ner.get_ner("ไทย", pos=False, tag=True), "ไทย" ) - self.assertIsNotNone( - ner.get_ner("บางแสนกรุงเทพ", pos=False, tag=True) - ) + self.assertIsNotNone(ner.get_ner("บางแสนกรุงเทพ", pos=False, tag=True)) # argument `tag` is False and `pos` is True self.assertEqual( ner.get_ner("ไทย", pos=True, tag=False), - [('ไทย', 'PROPN', 'B-LOCATION')], + [("ไทย", "PROPN", "B-LOCATION")], ) # arguement `tag` is False and `pos` is False @@ -317,9 +316,7 @@ def test_tltk_ner(self): self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า")) self.assertIsNotNone(tltk.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False)) self.assertIsNotNone( - tltk.get_ner( - "พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า" - ) + tltk.get_ner("พลเอกประยุกธ์ จันทร์โอชา ประกาศในฐานะหัวหน้า") ) self.assertIsNotNone( tltk.get_ner( @@ -366,14 +363,32 @@ def test_NNER_class(self): self.assertIsNotNone(nner.tag("แมวทำอะไรตอนห้าโมงเช้า")) def test_pos_tag_transformers(self): - self.assertIsNotNone(pos_tag_transformers( - words="แมวทำอะไรตอนห้าโมงเช้า", engine="bert", corpus="blackboard")) - self.assertIsNotNone(pos_tag_transformers( - words="แมวทำอะไรตอนห้าโมงเช้า", engine="mdeberta", corpus="pud")) - self.assertIsNotNone(pos_tag_transformers( - words="แมวทำอะไรตอนห้าโมงเช้า", engine="wangchanberta", corpus="pud")) + self.assertIsNotNone( + pos_tag_transformers( + words="แมวทำอะไรตอนห้าโมงเช้า", + engine="bert", + corpus="blackboard", + ) + ) + self.assertIsNotNone( + pos_tag_transformers( + words="แมวทำอะไรตอนห้าโมงเช้า", engine="mdeberta", corpus="pud" + ) + ) + self.assertIsNotNone( + pos_tag_transformers( + words="แมวทำอะไรตอนห้าโมงเช้า", + engine="wangchanberta", + corpus="pud", + ) + ) with self.assertRaises(ValueError): - pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine="non-existing-engine") + pos_tag_transformers( + words="แมวทำอะไรตอนห้าโมงเช้า", engine="non-existing-engine" + ) with self.assertRaises(ValueError): - pos_tag_transformers(words="แมวทำอะไรตอนห้าโมงเช้า", engine="bert", - corpus="non-existing corpus") \ No newline at end of file + pos_tag_transformers( + words="แมวทำอะไรตอนห้าโมงเช้า", + engine="bert", + corpus="non-existing corpus", + ) diff --git a/tests/test_tokenize.py b/tests/test_tokenize.py index 1537b62c9..431bfc0e0 100644 --- a/tests/test_tokenize.py +++ b/tests/test_tokenize.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest @@ -12,19 +14,19 @@ multi_cut, nercut, newmm, + oskut, + paragraph_tokenize, pyicu, + sefr_cut, sent_tokenize, ssg, subword_tokenize, syllable_tokenize, tcc, tcc_p, - word_tokenize, - sefr_cut, tltk, - oskut, word_detokenize, - paragraph_tokenize, + word_tokenize, ) from pythainlp.tokenize import clause_tokenize as sent_clause_tokenize from pythainlp.util import dict_trie @@ -235,8 +237,7 @@ def test_sent_tokenize(self): + " จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้" ) sent_3_toks = [ - "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา" - + "จากผลงานวิจัยที่เคยทำมาในอดีต ", + "(1) บทความนี้ผู้เขียนสังเคราะห์ขึ้นมา" + "จากผลงานวิจัยที่เคยทำมาในอดีต ", "มิได้ทำการศึกษาค้นคว้าใหม่อย่างกว้างขวางแต่อย่างใด ", "จึงใคร่ขออภัยในความบกพร่องทั้งปวงมา ณ ที่นี้", ] @@ -360,9 +361,7 @@ def test_subword_tokenize(self): self.assertIsInstance( subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"), list ) - self.assertFalse( - "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc") - ) + self.assertFalse("า" in subword_tokenize("สวัสดีดาวอังคาร", engine="tcc")) self.assertIsInstance( subword_tokenize("สวัสดีดาวอังคาร", engine="tcc_p"), list ) @@ -396,9 +395,7 @@ def test_subword_tokenize(self): subword_tokenize("สวัสดีชาวโลก", engine="dict"), ["สวัส", "ดี", "ชาว", "โลก"], ) - self.assertFalse( - "า" in subword_tokenize("สวัสดีชาวโลก", engine="dict") - ) + self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict")) self.assertEqual(subword_tokenize(None, engine="ssg"), []) self.assertEqual(subword_tokenize(None, engine="han_solo"), []) self.assertEqual( @@ -407,11 +404,10 @@ def test_subword_tokenize(self): self.assertTrue( "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") ) - self.assertFalse( - "า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg") - ) + self.assertFalse("า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")) self.assertEqual( - subword_tokenize("แมวกินปลา", engine="han_solo"), ["แมว", "กิน", "ปลา"] + subword_tokenize("แมวกินปลา", engine="han_solo"), + ["แมว", "กิน", "ปลา"], ) self.assertTrue( "ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="han_solo") @@ -472,15 +468,11 @@ def test_attacut(self): ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"], ) self.assertEqual( - attacut.segment( - "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-sc" - ), + attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-sc"), ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"], ) self.assertIsNotNone( - attacut.segment( - "ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c" - ) + attacut.segment("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", model="attacut-c") ) def test_deepcut(self): @@ -637,13 +629,9 @@ def test_newmm(self): word_tokenize("จุ๋มง่วงนอนยัง", engine="newmm"), ["จุ๋ม", "ง่วงนอน", "ยัง"], ) + self.assertEqual(word_tokenize("จุ๋มง่วง", engine="newmm"), ["จุ๋ม", "ง่วง"]) self.assertEqual( - word_tokenize("จุ๋มง่วง", engine="newmm"), ["จุ๋ม", "ง่วง"] - ) - self.assertEqual( - word_tokenize( - "จุ๋ม ง่วง", engine="newmm", keep_whitespace=False - ), + word_tokenize("จุ๋ม ง่วง", engine="newmm", keep_whitespace=False), ["จุ๋ม", "ง่วง"], ) self.assertFalse( @@ -654,13 +642,14 @@ def test_newmm(self): ) ) self.assertEqual( - word_tokenize("(คนไม่เอา)", engine="newmm"), ['(', 'คน', 'ไม่', 'เอา', ')'] + word_tokenize("(คนไม่เอา)", engine="newmm"), + ["(", "คน", "ไม่", "เอา", ")"], ) self.assertEqual( - word_tokenize("กม/ชม", engine="newmm"), ['กม', '/', 'ชม'] + word_tokenize("กม/ชม", engine="newmm"), ["กม", "/", "ชม"] ) self.assertEqual( - word_tokenize("สีหน้า(รถ)", engine="newmm"), ['สีหน้า', '(', 'รถ', ')'] + word_tokenize("สีหน้า(รถ)", engine="newmm"), ["สีหน้า", "(", "รถ", ")"] ) def test_newmm_longtext(self): @@ -695,12 +684,10 @@ def test_nercut(self): self.assertEqual(nercut.segment(None), []) self.assertEqual(nercut.segment(""), []) self.assertIsNotNone(nercut.segment("ทดสอบ")) - self.assertEqual(nercut.segment("ทันแน่ๆ"), ['ทัน', 'แน่ๆ']) - self.assertEqual(nercut.segment("%1ครั้ง"), ['%', '1', 'ครั้ง']) - self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ['ทุ๊กกโคนน']) - self.assertIsNotNone( - nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ") - ) + self.assertEqual(nercut.segment("ทันแน่ๆ"), ["ทัน", "แน่ๆ"]) + self.assertEqual(nercut.segment("%1ครั้ง"), ["%", "1", "ครั้ง"]) + self.assertEqual(nercut.segment("ทุ๊กกโคนน"), ["ทุ๊กกโคนน"]) + self.assertIsNotNone(nercut.segment("อย่าลืมอัพการ์ดนะจ๊ะ")) self.assertIsNotNone(word_tokenize("ทดสอบ", engine="nercut")) def test_ssg(self): @@ -716,55 +703,57 @@ def test_tcc(self): self.assertEqual( tcc.segment("ประเทศไทย"), ["ป", "ระ", "เท", "ศ", "ไท", "ย"] ) + self.assertEqual(tcc.segment("พิสูจน์ได้ค่ะ"), ["พิ", "สูจน์", "ได้", "ค่ะ"]) self.assertEqual( - tcc.segment("พิสูจน์ได้ค่ะ"), ['พิ', 'สูจน์', 'ได้', 'ค่ะ'] - ) - self.assertEqual( - tcc.segment("หอมรดกไทย"), ['ห', 'อ', 'ม', 'ร', 'ด', 'ก', 'ไท', 'ย'] - ) - self.assertEqual( - tcc.segment("เรือน้อยลอยอยู่"), ['เรื', 'อ', 'น้', 'อ', 'ย', 'ล', 'อ', 'ย', 'อ', 'ยู่'] - ) - self.assertEqual( - tcc.segment("ประสานงานกับลูกค้า"), ['ป', 'ระ', 'สา', 'น', 'งา', 'น', 'กั', 'บ', 'ลู', 'ก', 'ค้า'] - ) - self.assertEqual( - tcc.segment("ประกันภัยสัมพันธ์"), ['ป', 'ระ', 'กั', 'น', 'ภั', 'ย', 'สั', 'ม', 'พั','นธ์'] # It don't look like TCC in ETCC paper - ) - self.assertEqual( - tcc.segment("ตากลม"), ['ตา', 'ก', 'ล', 'ม'] + tcc.segment("หอมรดกไทย"), ["ห", "อ", "ม", "ร", "ด", "ก", "ไท", "ย"] ) self.assertEqual( - tcc.segment("เครื่องมือสื่อสารมีหลายชนิด"), - [ - 'เค', - 'รื่อ', - 'ง', - 'มือ', - 'สื่อ', - 'สา', - 'ร', - 'มี', - 'ห', - 'ลา', - 'ย', - 'ช', - 'นิ', - 'ด' - ] + tcc.segment("เรือน้อยลอยอยู่"), + ["เรื", "อ", "น้", "อ", "ย", "ล", "อ", "ย", "อ", "ยู่"], ) self.assertEqual( - tcc.segment("ประชาชน"), ['ป', 'ระ', 'ชา', 'ช', 'น'] + tcc.segment("ประสานงานกับลูกค้า"), + ["ป", "ระ", "สา", "น", "งา", "น", "กั", "บ", "ลู", "ก", "ค้า"], ) self.assertEqual( - tcc.segment("ไหมไทย"), ['ไห', 'ม', 'ไท', 'ย'] - ) - self.assertEqual( - tcc.segment("ยินดี"), ['ยิ', 'น', 'ดี'] - ) - self.assertEqual( - tcc.segment("ขุดหลุม"), ['ขุ', 'ด', 'ห', 'ลุ', 'ม'] + tcc.segment("ประกันภัยสัมพันธ์"), + [ + "ป", + "ระ", + "กั", + "น", + "ภั", + "ย", + "สั", + "ม", + "พั", + "นธ์", + ], # It don't look like TCC in ETCC paper + ) + self.assertEqual(tcc.segment("ตากลม"), ["ตา", "ก", "ล", "ม"]) + self.assertEqual( + tcc.segment("เครื่องมือสื่อสารมีหลายชนิด"), + [ + "เค", + "รื่อ", + "ง", + "มือ", + "สื่อ", + "สา", + "ร", + "มี", + "ห", + "ลา", + "ย", + "ช", + "นิ", + "ด", + ], ) + self.assertEqual(tcc.segment("ประชาชน"), ["ป", "ระ", "ชา", "ช", "น"]) + self.assertEqual(tcc.segment("ไหมไทย"), ["ไห", "ม", "ไท", "ย"]) + self.assertEqual(tcc.segment("ยินดี"), ["ยิ", "น", "ดี"]) + self.assertEqual(tcc.segment("ขุดหลุม"), ["ขุ", "ด", "ห", "ลุ", "ม"]) self.assertEqual(list(tcc.tcc("")), []) self.assertEqual(tcc.tcc_pos(""), set()) @@ -774,14 +763,14 @@ def test_tcc_p(self): self.assertEqual( tcc_p.segment("ประเทศไทย"), ["ป", "ระ", "เท", "ศ", "ไท", "ย"] ) + self.assertEqual(tcc_p.segment("พิสูจน์ได้ค่ะ"), ["พิ", "สูจน์", "ได้", "ค่ะ"]) self.assertEqual( - tcc_p.segment("พิสูจน์ได้ค่ะ"), ['พิ', 'สูจน์', 'ได้', 'ค่ะ'] - ) - self.assertEqual( - tcc_p.segment("หอมรดกไทย"), ['ห', 'อ', 'ม', 'ร', 'ด', 'ก', 'ไท', 'ย'] + tcc_p.segment("หอมรดกไทย"), + ["ห", "อ", "ม", "ร", "ด", "ก", "ไท", "ย"], ) self.assertEqual( - tcc_p.segment("เรือน้อยลอยอยู่"), ['เรือ', 'น้', 'อ', 'ย', 'ล', 'อ', 'ย', 'อ', 'ยู่'] + tcc_p.segment("เรือน้อยลอยอยู่"), + ["เรือ", "น้", "อ", "ย", "ล", "อ", "ย", "อ", "ยู่"], ) # Not implemented # self.assertEqual( @@ -825,15 +814,11 @@ def test_word_detokenize(self): [["ผม", "เลี้ยง", " ", "5", " ", "ตัว"]], ) self.assertEqual( - word_detokenize( - ["ผม", "เลี้ยง", "5", "10", "ตัว", "ๆ", "คน", "ดี"] - ), + word_detokenize(["ผม", "เลี้ยง", "5", "10", "ตัว", "ๆ", "คน", "ดี"]), "ผมเลี้ยง 5 10 ตัว ๆ คนดี", ) self.assertEqual( - word_detokenize( - ["ผม", "เลี้ยง", "5", "ตัว", " ", "ๆ", "คน", "ดี"] - ), + word_detokenize(["ผม", "เลี้ยง", "5", "ตัว", " ", "ๆ", "คน", "ดี"]), "ผมเลี้ยง 5 ตัว ๆ คนดี", ) self.assertTrue( diff --git a/tests/test_tools.py b/tests/test_tools.py index a61842c37..8fec480f0 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 import unittest diff --git a/tests/test_util.py b/tests/test_util.py index 85a03ddc3..9d821ff01 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + """ Unit tests for pythainlp.util module. """ diff --git a/tests/test_wsd.py b/tests/test_wsd.py index e6666a7dc..47f7b7c59 100644 --- a/tests/test_wsd.py +++ b/tests/test_wsd.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- +# SPDX-FileCopyrightText: Copyright 2016-2023 PyThaiNLP Project +# SPDX-License-Identifier: Apache-2.0 + import unittest from pythainlp.wsd import get_sense