Skip to content

Commit

Permalink
Update test_tokenize.py
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Apr 22, 2021
1 parent 2f39603 commit 9bf1842
Showing 1 changed file with 18 additions and 0 deletions.
18 changes: 18 additions & 0 deletions tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,24 @@ def test_subword_tokenize(self):
self.assertFalse(
" " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
)
self.assertEqual(
subword_tokenize("สวัสดีชาวโลก", engine="dict"), ["สวัส", "ดี", "ชาว", "โลก"]
)
self.assertFalse("า" in subword_tokenize("สวัสดีชาวโลก", engine="dict"))
self.assertEqual(subword_tokenize(None, engine="ssg"), [])
self.assertEqual(syllable_tokenize("", engine="ssg"), [])
self.assertEqual(
subword_tokenize("แมวกินปลา", engine="ssg"), ["แมว", "กิน", "ปลา"]
)
self.assertTrue(
"ดาว" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
)
self.assertFalse(
"า" in subword_tokenize("สวัสดีดาวอังคาร", engine="ssg")
)
self.assertFalse(
" " in subword_tokenize("พันธมิตร ชา นม", keep_whitespace=False)
)
with self.assertRaises(ValueError):
subword_tokenize("นกแก้ว", engine="XX") # engine does not exist

Expand Down

0 comments on commit 9bf1842

Please sign in to comment.