diff --git a/.travis.yml b/.travis.yml
index 6588db3e7..75179d4e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,9 +18,11 @@ install:
 
 os:
   - linux
+
 # command to run tests, e.g. python setup.py test
 script:
   coverage run --source=pythainlp setup.py test
+
 after_success:
   coveralls
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 62ecbcbb3..a10d62615 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,15 +19,14 @@ We use the famous [gitflow](http://nvie.com/posts/a-successful-git-branching-mod
 
 ## Code Guidelines
 
-- Use [PEP8](http://www.python.org/dev/peps/pep-0008/);
+- Follows [PEP8](http://www.python.org/dev/peps/pep-0008/), use [black](https://github.com/ambv/black);
 - Write tests for your new features (please see "Tests" topic below);
 - Always remember that [commented code is dead
   code](http://www.codinghorror.com/blog/2008/07/coding-without-comments.html);
 - Name identifiers (variables, classes, functions, module names) with meaningful
   and pronounceable names (`x` is always wrong);
-- When manipulating strings, use [Python's new-style
-  formatting](http://docs.python.org/library/string.html#format-string-syntax)
-  (`'{} = {}'.format(a, b)` instead of `'%s = %s' % (a, b)`);
+- When manipulating strings, use [f-String](https://www.python.org/dev/peps/pep-0498/)
+  (use `"{a} = {b}"`, instead of `"{} = {}".format(a, b)` and `"%s = %s' % (a, b)"`);
 - All `#TODO` comments should be turned into issues (use our
   [GitHub issue system](https://github.com/PyThaiNLP/pythainlp/));
 - Run all tests before pushing (just execute `tox`) so you will know if your
diff --git a/Makefile b/Makefile
index d5c977215..0f103632c 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,6 @@ help:
 
 clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
 
-
 clean-build: ## remove build artifacts
 	rm -fr build/
 	rm -fr dist/
@@ -51,19 +50,16 @@ lint: ## check style with flake8
 	flake8 pythainlp tests
 
 test: ## run tests quickly with the default Python
-	
-		python setup.py test
+	python setup.py test
 
 test-all: ## run tests on every Python version with tox
 	tox
 
 coverage: ## check code coverage quickly with the default Python
-	
-		coverage run --source pythainlp setup.py test
-	
-		coverage report -m
-		coverage html
-		$(BROWSER) htmlcov/index.html
+	coverage run --source pythainlp setup.py test
+	coverage report -m
+	coverage html
+	$(BROWSER) htmlcov/index.html
 
 release: clean ## package and upload a release
 	python setup.py sdist upload
diff --git a/README-pypi.md b/README-pypi.md
index 886a37edb..b779f157e 100644
--- a/README-pypi.md
+++ b/README-pypi.md
@@ -1,6 +1,6 @@
 ![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4)
 
-# PyThaiNLP 2.0
+# PyThaiNLP 2.0.2
 
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/cb946260c87a4cc5905ca608704406f7)](https://www.codacy.com/app/pythainlp/pythainlp_2?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=PyThaiNLP/pythainlp&amp;utm_campaign=Badge_Grade)[![pypi](https://img.shields.io/pypi/v/pythainlp.svg)](https://pypi.python.org/pypi/pythainlp)
 [![Build Status](https://travis-ci.org/PyThaiNLP/pythainlp.svg?branch=develop)](https://travis-ci.org/PyThaiNLP/pythainlp)
@@ -12,9 +12,9 @@ PyThaiNLP is a Python library for natural language processing (NLP) of Thai lang
 
 PyThaiNLP includes Thai word tokenizers, transliterators, soundex converters, part-of-speech taggers, and spell checkers.
 
-📖 For details on upgrading from PyThaiNLP 1.7 to PyThaiNLP 2.0, see [From PyThaiNLP 1.7 to PyThaiNLP 2.0](https://thainlp.org/pythainlp/docs/2.0/notes/pythainlp-1_7-2_0.html)
+📖 [Upgrading from PyThaiNLP 1.7 to 2.0](https://thainlp.org/pythainlp/docs/2.0/notes/pythainlp-1_7-2_0.html)
 
-📖 For ThaiNER user after upgrading from PyThaiNLP 1.7 to PyThaiNLP 2.0, see [Upgrade ThaiNER from PyThaiNLP 1.7 to PyThaiNLP 2.0](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
+📖 [Upgrade ThaiNER from PyThaiNLP 1.7 to 2.0](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
 
 📫 follow us on Facebook [Pythainlp](https://www.facebook.com/pythainlp/)
 
diff --git a/README.md b/README.md
index 880a579ca..e25c27d97 100644
--- a/README.md
+++ b/README.md
@@ -15,12 +15,12 @@ Thai Natural Language Processing in Python.
 PyThaiNLP is a Python package for text processing and linguistic analysis, similar to `nltk` but with focus on Thai language.
 
 - [Current PyThaiNLP stable release is 2.0](https://github.com/PyThaiNLP/pythainlp/tree/master)
-- PyThaiNLP 2.0 will support only Python 3.6+. Some functions may work with older version of Python 3, but it is not well-tested and will not be supported. See [PyThaiNLP 2.0 change log](https://github.com/PyThaiNLP/pythainlp/issues/118).
-- Python 2 users can use PyThaiNLP 1.6, our latest released that tested with Python 2.7.
+- PyThaiNLP 2.0 supports Python 3.6+. Some functions may work with older version of Python 3, but it is not well-tested and will not be supported. See [PyThaiNLP 2.0 change log](https://github.com/PyThaiNLP/pythainlp/issues/118).
+- Python 2.7+ users can use PyThaiNLP 1.6.
 
-**This is a document for development branch (post 1.7.x). Things will break. For a stable branch document, see [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
+**This is a document for development branch (post 2.0). Things will break. For a stable branch document, see [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
 
-📫 follow us on Facebook [Pythainlp](https://www.facebook.com/pythainlp/)
+📫 follow us on Facebook [PyThaiNLP](https://www.facebook.com/pythainlp/)
 
 ## Capabilities
 
@@ -34,7 +34,7 @@ PyThaiNLP is a Python package for text processing and linguistic analysis, simil
 - Thai misspellings detection and spelling correction (```spell```)
 - Thai soundex (```lk82```, ```udom83```, ```metasound```)
 - Thai WordNet wrapper
-- and much more - see [examples](https://github.com/PyThaiNLP/pythainlp/tree/dev/examples).
+- and much more - see examples in [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb).
 
 ## Installation
 
@@ -102,13 +102,10 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 
 > เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
 
-รองรับ Python 3.6 ขึ้นไป
+- PyThaiNLP 2.0 รองรับ Python 3.6 ขึ้นไป
+- ผู้ใช้ Python 2.7+ ยังสามารถใช้ PyThaiNLP 1.6 ได้
 
-- ตั้งแต่รุ่น 1.7 PyThaiNLP จะเลิกสนับสนุน Python 2 (บางฟังก์ชันอาจยังทำงานได้ แต่จะไม่ได้รับการสนับสนุน)
-- ตั้งแต่รุ่น 2.0 จะยุติการรองรับ Python 2 ทั้งหมด
-- ผู้ใช้ Python 2 ยังสามารถใช้ PyThaiNLP 1.6 ได้
-
-**เอกสารนี้สำหรับรุ่นพัฒนา (หลัง 1.7.x) อาจมีการเปลี่ยนแปลงได้ตลอด สำหรับเอกสารรุ่นเสถียร ดูที่ [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
+**เอกสารนี้สำหรับรุ่นพัฒนา (หลัง 2.0) อาจมีการเปลี่ยนแปลงได้ตลอด สำหรับเอกสารรุ่นเสถียร ดูที่ [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
 
 📫 ติดตามข่าวสารได้ที่ Facebook [Pythainlp](https://www.facebook.com/pythainlp/)
 
@@ -125,7 +122,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 - ตรวจคำสะกดผิดในภาษาไทย (```spell```)
 - soundex ภาษาไทย (```lk82```, ```udom83```, ```metasound```)
 - Thai WordNet wrapper
-- และอื่น ๆ [ดูตัวอย่าง](https://github.com/PyThaiNLP/pythainlp/tree/dev/examples)
+- และอื่น ๆ ดูตัวอย่างได้ใน [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb)
 
 ## ติดตั้ง
 
diff --git a/bin/pythainlp b/bin/pythainlp
index 3582b89ad..1e3a68691 100644
--- a/bin/pythainlp
+++ b/bin/pythainlp
@@ -45,4 +45,4 @@ elif args.soundex!=None:
         args.engine="lk82"
     print(soundex(args.soundex, engine=args.engine))
 else:
-    print("PyThaiNLP 2.0")
+    print("PyThaiNLP 2.0.2")
diff --git a/conda.recipe/meta-old.yaml b/conda.recipe/meta-old.yaml
deleted file mode 100644
index 632fb2109..000000000
--- a/conda.recipe/meta-old.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-{% set version = "1.7.2" %}
-
-package:
-  name: pythainlp
-  version: {{ version }}
-
-build:
-  noarch: python
-  number: 0
-  script: python -m pip install --no-deps --ignore-installed .
-
-requirements:
-  host:
-    - pip
-    - python
-    - setuptools
-    - nltk
-    - future
-    - six
-    - marisa_trie
-    - dill
-    - pytz
-    - tinydb
-    - tqdm
-
-
-  run:
-    - python
-    - nltk
-    - future
-    - six
-    - marisa_trie
-    - dill
-    - pytz
-    - tinydb
-    - tqdm
-
-test:
-  imports:
-    - pvlib
-
-about:
-  home: https://github.com/PyThaiNLP/pythainlp
-  license: Apache License 2.0
-  summary: 'Thai Natural Language Processing in Python.'
-
-extra:
-  recipe-maintainers:
-    - pythainlp
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index 8e36acad6..f25188849 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "1.7.2" %}
+{% set version = "2.0.2" %}
 
 package:
   name: pythainlp
diff --git a/docs/api/spell.rst b/docs/api/spell.rst
index 7544a58d5..b2c77736b 100644
--- a/docs/api/spell.rst
+++ b/docs/api/spell.rst
@@ -8,8 +8,4 @@ Modules
 -------
 
 .. autofunction:: spell
-.. autofunction:: pythainlp.spell.pn.spell
-.. autofunction:: pythainlp.spell.pn.prob
-.. autofunction:: pythainlp.spell.pn.correct
-.. autofunction:: pythainlp.spell.pn.known
-.. autofunction:: pythainlp.spell.pn.dictionary
+.. autofunction:: correct
diff --git a/docs/api/util.rst b/docs/api/util.rst
index 1906fe48d..166f52375 100644
--- a/docs/api/util.rst
+++ b/docs/api/util.rst
@@ -14,9 +14,9 @@ Modules
 .. autofunction:: digit_to_text
 .. autofunction:: eng_to_thai
 .. autofunction:: find_keyword
-.. autofunction:: is_thai
-.. autofunction:: is_thaichar
-.. autofunction:: is_thaiword
+.. autofunction:: countthai
+.. autofunction:: isthai
+.. autofunction:: isthaichar
 .. autofunction:: normalize
 .. autofunction:: now_reign_year
 .. autofunction:: num_to_thaiword
diff --git a/docs/pythainlp-1-3-thai.md b/docs/archive/pythainlp-1-3-thai.md
similarity index 100%
rename from docs/pythainlp-1-3-thai.md
rename to docs/archive/pythainlp-1-3-thai.md
diff --git a/docs/pythainlp-1-4-eng.md b/docs/archive/pythainlp-1-4-eng.md
similarity index 100%
rename from docs/pythainlp-1-4-eng.md
rename to docs/archive/pythainlp-1-4-eng.md
diff --git a/docs/pythainlp-1-4-eng.pdf b/docs/archive/pythainlp-1-4-eng.pdf
similarity index 100%
rename from docs/pythainlp-1-4-eng.pdf
rename to docs/archive/pythainlp-1-4-eng.pdf
diff --git a/docs/pythainlp-1-4-thai.md b/docs/archive/pythainlp-1-4-thai.md
similarity index 100%
rename from docs/pythainlp-1-4-thai.md
rename to docs/archive/pythainlp-1-4-thai.md
diff --git a/docs/pythainlp-1-4-thai.pdf b/docs/archive/pythainlp-1-4-thai.pdf
similarity index 100%
rename from docs/pythainlp-1-4-thai.pdf
rename to docs/archive/pythainlp-1-4-thai.pdf
diff --git a/docs/pythainlp-1-5-eng.md b/docs/archive/pythainlp-1-5-eng.md
similarity index 100%
rename from docs/pythainlp-1-5-eng.md
rename to docs/archive/pythainlp-1-5-eng.md
diff --git a/docs/pythainlp-1-5-thai.md b/docs/archive/pythainlp-1-5-thai.md
similarity index 100%
rename from docs/pythainlp-1-5-thai.md
rename to docs/archive/pythainlp-1-5-thai.md
diff --git a/docs/pythainlp-1-6-eng.md b/docs/archive/pythainlp-1-6-eng.md
similarity index 100%
rename from docs/pythainlp-1-6-eng.md
rename to docs/archive/pythainlp-1-6-eng.md
diff --git a/docs/pythainlp-1-6-thai.md b/docs/archive/pythainlp-1-6-thai.md
similarity index 100%
rename from docs/pythainlp-1-6-thai.md
rename to docs/archive/pythainlp-1-6-thai.md
diff --git a/docs/pythainlp-1-7.md b/docs/archive/pythainlp-1-7.md
similarity index 100%
rename from docs/pythainlp-1-7.md
rename to docs/archive/pythainlp-1-7.md
diff --git a/docs/pythainlp-dev-thai.md b/docs/archive/pythainlp-dev-thai.md
similarity index 100%
rename from docs/pythainlp-dev-thai.md
rename to docs/archive/pythainlp-dev-thai.md
diff --git a/docs/whatsnew-1.7.md b/docs/whatsnew-1.7.md
deleted file mode 100644
index 768b6f450..000000000
--- a/docs/whatsnew-1.7.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# มีอะไรใหม่ใน PyThaiNLP 1.7
-
-## สรุปประเด็นสำคัญ
-
-- เลิกสนับสนุน Python 2.7 อย่างเป็นทางการ
-- เพิ่ม ULMFit utility
-- ปรับปรุงระบบตัดคำใหม่ ทั้ง newmm และ mm
-- thai2vec 0.2
-- sentiment analysis ตัวใหม่ทำงานด้วย deep learning
-- เพิ่ม thai2rom เป็น Thai romanization ทำด้วย deep learning ในระดับตัวอักษร
-
-กำลังปรับปรุง...
diff --git a/examples/collate.py b/examples/collate.py
deleted file mode 100644
index d4e30525e..000000000
--- a/examples/collate.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.util import collate
-
-print(collate(["ไก่", "ไข่", "ก", "ฮา"]))  # ['ก', 'ไก่', 'ไข่', 'ฮา']
diff --git a/examples/date.py b/examples/date.py
deleted file mode 100644
index 888d9c178..000000000
--- a/examples/date.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import datetime
-from pythainlp.util import thai_strftime
-
-fmt = "%Aที่ %-d %B พ.ศ. %Y เวลา %H:%Mน. (%a %d-%b-%y)"
-date = datetime.datetime(1976, 10, 6, 1, 40)
-
-# วันพุธที่ 6 ตุลาคม พ.ศ. 2519 เวลา 01:40น. (พ 06-ต.ค.-19)
-print(thai_strftime(date, fmt))
diff --git a/examples/etcc.py b/examples/etcc.py
deleted file mode 100644
index f732fdf11..000000000
--- a/examples/etcc.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import etcc
-
-print(etcc.etcc("คืนความสุข"))  # /คืน/ความสุข
diff --git a/examples/ner.py b/examples/ner.py
deleted file mode 100644
index 773859e84..000000000
--- a/examples/ner.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tag.named_entity import ThaiNameTagger
-ner = ThaiNameTagger()
-print(ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น."))
diff --git a/examples/normalize.py b/examples/normalize.py
deleted file mode 100644
index cac000306..000000000
--- a/examples/normalize.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.util import normalize
-
-print(normalize("เเปลก") == "แปลก")  # เ เ ป ล ก กับ แปลก
diff --git a/examples/soundex.py b/examples/soundex.py
deleted file mode 100644
index 9864ac747..000000000
--- a/examples/soundex.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.soundex import lk82, metasound, udom83
-
-texts = ["บูรณะ", "บูรณการ", "มัก", "มัค", "มรรค", "ลัก", "รัก", "รักษ์", ""]
-for text in texts:
-    print(
-        "{} - lk82: {} - udom83: {} - metasound: {}".format(
-            text, lk82(text), udom83(text), metasound(text)
-        )
-    )
-
-# check equivalence
-print(lk82("รถ") == lk82("รด"))
-print(udom83("วรร") == udom83("วัน"))
-print(metasound("นพ") == metasound("นภ"))
diff --git a/examples/spell.py b/examples/spell.py
deleted file mode 100644
index 92dbc49f3..000000000
--- a/examples/spell.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.corpus import ttc
-from pythainlp.spell import spell
-from pythainlp.spell.pn import NorvigSpellChecker
-from pythainlp.spell.pn import correct as pn_tnc_correct
-from pythainlp.spell.pn import spell as pn_tnc_spell
-
-# spell checker from pythainlp.spell module (generic)
-print(spell("สี่เหลียม"))  # ['สี่เหลี่ยม']
-
-# spell checker from pythainlp.spell.pn module (specified algorithm - Peter Norvig's)
-print(pn_tnc_spell("เหลืยม"))
-print(pn_tnc_correct("เหลืยม"))
-
-
-# spell checker from pythainlp.spell.pn module (specified algorithm, custom dictionary)
-ttc_word_freqs = ttc.word_freqs()
-pn_ttc_checker = NorvigSpellChecker(custom_dict=ttc_word_freqs)
-print(pn_ttc_checker.spell("เหลืยม"))
-print(pn_ttc_checker.correct("เหลืยม"))
-
-# apply different dictionary filter when creating spell checker
-pn_tnc_checker = NorvigSpellChecker()
-print(len(pn_tnc_checker.dictionary()))
-pn_tnc_checker_no_filter = NorvigSpellChecker(dict_filter=None)
-print(len(pn_tnc_checker_no_filter.dictionary()))
diff --git a/examples/tcc.py b/examples/tcc.py
deleted file mode 100644
index 4d95aed43..000000000
--- a/examples/tcc.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import tcc
-
-print(tcc.tcc("ประเทศไทย"))  # ป/ระ/เท/ศ/ไท/ย
-
-print(tcc.tcc_pos("ประเทศไทย"))  # {1, 3, 5, 6, 8, 9}
-
-for ch in tcc.tcc_gen("ประเทศไทย"):  # ป-ระ-เท-ศ-ไท-ย-
-    print(ch, end='-')
diff --git a/examples/tokenize.py b/examples/tokenize.py
deleted file mode 100644
index 0b8a0d00b..000000000
--- a/examples/tokenize.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import sent_tokenize, word_tokenize
-
-text = "ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย "
-print(text)
-
-print(sent_tokenize(text))
-# ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย', '']
-
-print(word_tokenize(text))
-# ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']
-
-print(word_tokenize(text, whitespaces=False))
-# ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']
-
-text2 = "กฎหมายแรงงาน"
-print(text2)
-
-print(word_tokenize(text2))
-# ['กฎหมายแรงงาน']
-
-print(word_tokenize(text2, engine="longest"))
-# ['กฎหมาย', 'แรงงาน']
diff --git a/examples/transliterate.py b/examples/transliterate.py
deleted file mode 100644
index 97fb4e7f1..000000000
--- a/examples/transliterate.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.transliterate import romanize, transliterate
-
-print(romanize("แมว"))
-print(transliterate("แมว"))
diff --git a/meta.yaml b/meta.yaml
index 0bc914207..714ecb262 100644
--- a/meta.yaml
+++ b/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "1.7.2" %}
+{% set version = "2.0.2" %}
 
 package:
   name: pythainlp
diff --git a/notebooks/pythainlp-get-started.ipynb b/notebooks/pythainlp-get-started.ipynb
new file mode 100644
index 000000000..806b9e47d
--- /dev/null
+++ b/notebooks/pythainlp-get-started.ipynb
@@ -0,0 +1,1077 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PyThaiNLP Get Started\n",
+    "\n",
+    "Code examples for basic functions in PyThaiNLP https://github.com/PyThaiNLP/pythainlp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Thai Characters\n",
+    "\n",
+    "PyThaiNLP provides some ready-to-use Thai character set (e.g. Thai consonants, vowels, tonemarks, symbols) as a string for convenience. There are also few utility functions to test if a string is in Thai or not."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮฤฦะัาำิีึืุูเแโใไๅ็่้๊๋ฯๆฺ์ํ๎๏๚๛๐๑๒๓๔๕๖๗๘๙฿'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp\n",
+    "\n",
+    "pythainlp.thai_characters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.thai_consonants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"๔\" in pythainlp.thai_digits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp.util\n",
+    "\n",
+    "pythainlp.util.isthai(\"ก\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.isthai(\"(ก.พ.)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.isthai(\"(ก.พ.)\", ignore_chars=\".()\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100.0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.countthai(\"วันอาทิตย์ที่ 24 มีนาคม 2562\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.85714285714286"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.countthai(\"วันอาทิตย์ที่ 24 มีนาคม 2562\", ignore_chars=\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Collation\n",
+    "\n",
+    "Sorting according to Thai dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['กรรไกร', 'กระดาษ', 'ไข่', 'ค้อน', 'ผ้าไหม']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import collate\n",
+    "\n",
+    "thai_words = [\"ค้อน\", \"กระดาษ\", \"กรรไกร\", \"ไข่\", \"ผ้าไหม\"]\n",
+    "collate(thai_words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ผ้าไหม', 'ค้อน', 'ไข่', 'กระดาษ', 'กรรไกร']"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "collate(thai_words, reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Date and Time Format\n",
+    "\n",
+    "Get Thai day and month names with Thai Buddhist Era (B.E.)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'วันพุธที่ 6 ตุลาคม พ.ศ. 2519 เวลา 01:40 น. (พ 06-ต.ค.-19)'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import datetime\n",
+    "from pythainlp.util import thai_strftime\n",
+    "\n",
+    "fmt = \"%Aที่ %-d %B พ.ศ. %Y เวลา %H:%M น. (%a %d-%b-%y)\"\n",
+    "date = datetime.datetime(1976, 10, 6, 1, 40)\n",
+    "\n",
+    "thai_strftime(date, fmt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tokenization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Thai Character Cluster (TCC) and Extended TCC\n",
+    "\n",
+    "According to [Character Cluster Based Thai Information Retrieval](https://www.researchgate.net/publication/2853284_Character_Cluster_Based_Thai_Information_Retrieval) (Theeramunkong et al. 2004)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ป', 'ระ', 'เท', 'ศ', 'ไท', 'ย']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import tcc\n",
+    "\n",
+    "tcc.tcc(\"ประเทศไทย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1, 3, 5, 6, 8, 9}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tcc.tcc_pos(\"ประเทศไทย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ป/ระ/เท/ศ/ไท/ย/"
+     ]
+    }
+   ],
+   "source": [
+    "for ch in tcc.tcc_gen(\"ประเทศไทย\"):\n",
+    "    print(ch, end='/')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sentence and Word\n",
+    "\n",
+    "Default word tokenizer (\"newmm\") use maximum matching algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sent_tokenize: ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย']\n",
+      "word_tokenize: ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']\n",
+      "word_tokenize, without whitespace: ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp import sent_tokenize, word_tokenize\n",
+    "\n",
+    "text = \"ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย \"\n",
+    "\n",
+    "print(\"sent_tokenize:\", sent_tokenize(text))\n",
+    "print(\"word_tokenize:\", word_tokenize(text))\n",
+    "print(\"word_tokenize, without whitespace:\", word_tokenize(text, whitespaces=False))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Other algorithm can be chosen. We can also create a tokenizer with custom dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "newmm: ['กฎหมายแรงงาน', 'ฉบับ', 'ปรับปรุง', 'ใหม่', 'ประกาศ', 'ใช้แล้ว']\n",
+      "longest: ['กฎหมายแรงงาน', 'ฉบับ', 'ปรับปรุง', 'ใหม่', 'ประกาศใช้', 'แล้ว']\n",
+      "custom: ['กฎ', 'หมายแรง', 'งาน', 'ฉบับปรับปรุงใหม่ประกาศใช้แล้ว']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp import word_tokenize, Tokenizer\n",
+    "\n",
+    "text = \"กฎหมายแรงงานฉบับปรับปรุงใหม่ประกาศใช้แล้ว\"\n",
+    "\n",
+    "print(\"newmm:\", word_tokenize(text))  # default engine is \"newmm\"\n",
+    "print(\"longest:\", word_tokenize(text, engine=\"longest\"))\n",
+    "\n",
+    "words = [\"กฎ\", \"งาน\"]\n",
+    "custom_tokenizer = Tokenizer(words)\n",
+    "print(\"custom:\", custom_tokenizer.word_tokenize(text))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Default word tokenizer use a word list from pythainlp.corpus.common.thai_words().\n",
+    "We can get that list, add/remove words, and create new tokenizer from the modified list."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "newmm: ['ไอแซค', ' ', 'อสิ', 'มอ', 'ฟ']\n",
+      "custom: ['ไอแซค', ' ', 'อสิมอฟ']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.corpus.common import thai_words\n",
+    "from pythainlp import word_tokenize, Tokenizer\n",
+    "\n",
+    "text = \"ไอแซค อสิมอฟ\"\n",
+    "\n",
+    "print(\"newmm:\", word_tokenize(text))\n",
+    "\n",
+    "words = set(thai_words())  # thai_words() returns frozenset\n",
+    "words.add(\"อสิมอฟ\")\n",
+    "custom_tokenizer = Tokenizer(words)\n",
+    "print(\"custom:\", custom_tokenizer.word_tokenize(text))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transliteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'maeo'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.transliterate import romanize\n",
+    "\n",
+    "romanize(\"แมว\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mɛːw\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.transliterate import transliterate\n",
+    "\n",
+    "print(transliterate(\"แมว\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Normalization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import normalize\n",
+    "\n",
+    "normalize(\"เเปลก\") == \"แปลก\"  # เ เ ป ล ก  vs แปลก"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Soundex\n",
+    "\n",
+    "\"Soundex is a phonetic algorithm for indexing names by sound.\" ([Wikipedia](https://en.wikipedia.org/wiki/Soundex)). PyThaiNLP provides three kinds of Thai soundex."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.soundex import lk82, metasound, udom83\n",
+    "\n",
+    "# check equivalence\n",
+    "print(lk82(\"รถ\") == lk82(\"รด\"))\n",
+    "print(udom83(\"วรร\") == udom83(\"วัน\"))\n",
+    "print(metasound(\"นพ\") == metasound(\"นภ\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "บูรณะ - lk82: บE400 - udom83: บ930000 - metasound: บ550\n",
+      "บูรณการ - lk82: บE419 - udom83: บ931900 - metasound: บ551\n",
+      "มัก - lk82: ม1000 - udom83: ม100000 - metasound: ม100\n",
+      "มัค - lk82: ม1000 - udom83: ม100000 - metasound: ม100\n",
+      "มรรค - lk82: ม1000 - udom83: ม310000 - metasound: ม551\n",
+      "ลัก - lk82: ร1000 - udom83: ร100000 - metasound: ล100\n",
+      "รัก - lk82: ร1000 - udom83: ร100000 - metasound: ร100\n",
+      "รักษ์ - lk82: ร1000 - udom83: ร100000 - metasound: ร100\n",
+      " - lk82:  - udom83:  - metasound: \n"
+     ]
+    }
+   ],
+   "source": [
+    "texts = [\"บูรณะ\", \"บูรณการ\", \"มัก\", \"มัค\", \"มรรค\", \"ลัก\", \"รัก\", \"รักษ์\", \"\"]\n",
+    "for text in texts:\n",
+    "    print(\n",
+    "        \"{} - lk82: {} - udom83: {} - metasound: {}\".format(\n",
+    "            text, lk82(text), udom83(text), metasound(text)\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Spellchecking\n",
+    "\n",
+    "Default spellchecker uses [Peter Norvig's algorithm](http://www.norvig.com/spell-correct.html) together with word frequency from Thai National Corpus (TNC)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['เหลียม', 'เหลือม']"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import spell\n",
+    "\n",
+    "# list possible spellings\n",
+    "spell(\"เหลืยม\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'เหลียม'"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import correct\n",
+    "\n",
+    "# choose the most likely spelling\n",
+    "correct(\"เหลืยม\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Spellchecking - Custom dictionary and word frequency\n",
+    "\n",
+    "Custom dictionary can be provided when creating spellchecker."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['เหลือม']\n",
+      "เหลือม\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.corpus import ttc  # Thai Textbook Corpus\n",
+    "from pythainlp.spell import NorvigSpellChecker\n",
+    "\n",
+    "checker = NorvigSpellChecker(custom_dict=ttc.word_freqs())\n",
+    "print(checker.spell(\"เหลืยม\"))\n",
+    "print(checker.correct(\"เหลืยม\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('จะ', 51681),\n",
+       " ('เป็น', 51273),\n",
+       " ('ไป', 46567),\n",
+       " ('ก็', 46409),\n",
+       " ('ไม่', 45895),\n",
+       " ('มี', 44899),\n",
+       " ('ได้', 44513),\n",
+       " ('ว่า', 40290),\n",
+       " ('ให้', 38715)]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(checker.dictionary())[1:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also apply conditions and filter function to dictionary when creating spellchecker."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "39977"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker = NorvigSpellChecker()  # use default filter (remove any word with number or non-Thai character)\n",
+    "len(checker.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "30379"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker = NorvigSpellChecker(min_freq=5, min_len=2, max_len=15)\n",
+    "len(checker.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "76706"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker_no_filter = NorvigSpellChecker(dict_filter=None)  # use no filter\n",
+    "len(checker_no_filter.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "76700"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def remove_yamok(word):\n",
+    "    return False if \"ๆ\" in word else True\n",
+    "\n",
+    "checker_custom_filter = NorvigSpellChecker(dict_filter=remove_yamok)  # use custom filter\n",
+    "len(checker_custom_filter.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part-of-Speech Tagging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('การ', 'FIXN'), ('เดินทาง', 'VACT')]"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.tag import pos_tag, pos_tag_sents\n",
+    "\n",
+    "pos_tag([\"การ\",\"เดินทาง\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[('ราชกิจจานุเบกษา', 'NCMN'),\n",
+       "  ('เผยแพร่', 'VACT'),\n",
+       "  ('ประกาศสำนักนายกฯ', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('ให้', 'JSBR'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  (\"'พล.ท.สรรเสริญ แก้วกำเนิด'\", 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('พ้นจากตำแหน่ง', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('ผู้ทรงคุณวุฒิพิเศษ', 'NCMN'),\n",
+       "  ('กองทัพบก', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('กระทรวงกลาโหม', 'NCMN')],\n",
+       " [('และ', 'JCRG'),\n",
+       "  ('แต่งตั้ง', 'VACT'),\n",
+       "  ('ให้', 'JSBR'),\n",
+       "  ('เป็น', 'VSTA'),\n",
+       "  ('ข้าราชการ', 'NCMN'),\n",
+       "  ('พลเรือน', 'NCMN'),\n",
+       "  ('สามัญ', 'NCMN'),\n",
+       "  ('ตำแหน่ง', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  (\"'อธิบดีกรมประชาสัมพันธ์'\", 'NCMN')]]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sents = [[\"ราชกิจจานุเบกษา\", \"เผยแพร่\", \"ประกาศสำนักนายกฯ\", \" \", \"ให้\",\n",
+    "    \" \", \"'พล.ท.สรรเสริญ แก้วกำเนิด'\", \" \", \"พ้นจากตำแหน่ง\",\n",
+    "    \" \", \"ผู้ทรงคุณวุฒิพิเศษ\", \"กองทัพบก\", \" \", \"กระทรวงกลาโหม\"],\n",
+    "    [\"และ\",\"แต่งตั้ง\",\"ให้\", \"เป็น\", \"ข้าราชการ\", \"พลเรือน\", \"สามัญ\",\n",
+    "    \"ตำแหน่ง\", \" \", \"'อธิบดีกรมประชาสัมพันธ์'\"]]\n",
+    "\n",
+    "pos_tag_sents(sents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Named-Entity Tagging\n",
+    "\n",
+    "The tagger use BIO scheme:\n",
+    "- B - beginning of entity\n",
+    "- I - inside entity\n",
+    "- O - outside entity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('วันที่', 'NOUN', 'O'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('15', 'NUM', 'B-DATE'),\n",
+       " (' ', 'PUNCT', 'I-DATE'),\n",
+       " ('ก.ย.', 'NOUN', 'I-DATE'),\n",
+       " (' ', 'PUNCT', 'I-DATE'),\n",
+       " ('61', 'NUM', 'I-DATE'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('ทดสอบ', 'VERB', 'O'),\n",
+       " ('ระบบ', 'NOUN', 'O'),\n",
+       " ('เวลา', 'NOUN', 'O'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('14', 'NOUN', 'B-TIME'),\n",
+       " (':', 'PUNCT', 'I-TIME'),\n",
+       " ('49', 'NUM', 'I-TIME'),\n",
+       " (' ', 'PUNCT', 'I-TIME'),\n",
+       " ('น.', 'NOUN', 'I-TIME')]"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.tag.named_entity import ThaiNameTagger\n",
+    "\n",
+    "ner = ThaiNameTagger()\n",
+    "ner.get_ner(\"วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Word Vector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n",
+      "INFO:gensim.models.utils_any2vec:loading projection weights from /Users/arthit/pythainlp-data/thai2vec.bin\n",
+      "INFO:gensim.models.utils_any2vec:loaded (60001, 400) matrix from /Users/arthit/pythainlp-data/thai2vec.bin\n",
+      "/usr/local/lib/python3.7/site-packages/gensim/matutils.py:737: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n",
+      "  if np.issubdtype(vec.dtype, np.int):\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.99259853"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp.word_vector\n",
+    "\n",
+    "pythainlp.word_vector.similarity(\"คน\", \"มนุษย์\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:gensim.models.keyedvectors:precomputing L2-norms of word weight vectors\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'แมว'"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.word_vector.doesnt_match([\"คน\", \"มนุษย์\", \"บุคคล\", \"เจ้าหน้าที่\", \"แมว\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Number Spell Out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'หนึ่งล้านสองแสนสามหมื่นสี่พันห้าร้อยหกสิบเจ็ดล้านแปดแสนเก้าหมื่นหนึ่งร้อยยี่สิบสามบาทสี่สิบห้าสตางค์'"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import bahttext\n",
+    "\n",
+    "bahttext(1234567890123.45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'หนึ่งบาทเก้าสิบเอ็ดสตางค์'"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bahttext(1.909)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/sentiment_analysis.ipynb b/notebooks/sentiment_analysis.ipynb
index 58b659687..a1ab56694 100644
--- a/notebooks/sentiment_analysis.ipynb
+++ b/notebooks/sentiment_analysis.ipynb
@@ -47,12 +47,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
+    "import re\n",
+    "\n",
+    "import emoji\n",
     "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
     "from pythainlp import word_tokenize\n",
     "from tqdm import tqdm_notebook\n",
-    "import re\n",
-    "import emoji\n",
     "\n",
     "#viz\n",
     "import matplotlib.pyplot as plt\n",
@@ -79,8 +81,8 @@
     "def replace_rep(text):\n",
     "    def _replace_rep(m):\n",
     "        c,cc = m.groups()\n",
-    "        return f'{c}xxrep'\n",
-    "    re_rep = re.compile(r'(\\S)(\\1{2,})')\n",
+    "        return f\"{c}xxrep\"\n",
+    "    re_rep = re.compile(r\"(\\S)(\\1{2,})\")\n",
     "    return re_rep.sub(_replace_rep, text)\n",
     "\n",
     "def ungroup_emoji(toks):\n",
@@ -100,7 +102,7 @@
     "    res = replace_rep(res)\n",
     "    \n",
     "    #tokenize\n",
-    "    res = [word for word in word_tokenize(res, engine='ulmfit') if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
+    "    res = [word for word in word_tokenize(res, engine=\"ulmfit\") if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
     "    \n",
     "    #post rules\n",
     "    res = ungroup_emoji(res)\n",
@@ -123,15 +125,13 @@
    },
    "outputs": [],
    "source": [
-    "with open('train.txt') as f:\n",
+    "with open(\"train.txt\") as f:\n",
     "    texts = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "with open('train_label.txt') as f:\n",
+    "with open(\"train_label.txt\") as f:\n",
     "    categories = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "all_df = pd.DataFrame({'category':categories, 'texts':texts})\n",
+    "all_df = pd.DataFrame({\"category\":categories, \"texts\":texts})\n",
     "all_df.shape"
    ]
   },
@@ -141,11 +141,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('test.txt') as f:\n",
+    "with open(\"test.txt\") as f:\n",
     "    texts = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "test_df = pd.DataFrame({'category':'test', 'texts':texts})\n",
+    "test_df = pd.DataFrame({\"category\":\"test\", \"texts\":texts})\n",
     "test_df.shape"
    ]
   },
@@ -162,16 +161,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_df = pd.read_csv('all_df.csv')\n",
-    "test_df = pd.read_csv('test_df.csv')\n",
+    "all_df = pd.read_csv(\"all_df.csv\")\n",
+    "test_df = pd.read_csv(\"test_df.csv\")\n",
     "\n",
-    "all_df['processed'] = all_df.texts.map(lambda x: '|'.join(process_text(x)))\n",
-    "all_df['wc'] = all_df.processed.map(lambda x: len(x.split('|')))\n",
-    "all_df['uwc'] = all_df.processed.map(lambda x: len(set(x.split('|'))))\n",
+    "all_df[\"processed\"] = all_df.texts.map(lambda x: \"|\".join(process_text(x)))\n",
+    "all_df[\"wc\"] = all_df.processed.map(lambda x: len(x.split(\"|\")))\n",
+    "all_df[\"uwc\"] = all_df.processed.map(lambda x: len(set(x.split(\"|\"))))\n",
     "\n",
-    "test_df['processed'] = test_df.texts.map(lambda x: '|'.join(process_text(x)))\n",
-    "test_df['wc'] = test_df.processed.map(lambda x: len(x.split('|')))\n",
-    "test_df['uwc'] = test_df.processed.map(lambda x: len(set(x.split('|'))))"
+    "test_df[\"processed\"] = test_df.texts.map(lambda x: \"|\".join(process_text(x)))\n",
+    "test_df[\"wc\"] = test_df.processed.map(lambda x: len(x.split(\"|\")))\n",
+    "test_df[\"uwc\"] = test_df.processed.map(lambda x: len(set(x.split(\"|\"))))"
    ]
   },
   {
@@ -352,7 +351,7 @@
    ],
    "source": [
     "#prevalence\n",
-    "print(train_df['category'].value_counts() / train_df.shape[0])"
+    "print(train_df[\"category\"].value_counts() / train_df.shape[0])"
    ]
   },
   {
@@ -374,7 +373,7 @@
    ],
    "source": [
     "#prevalence\n",
-    "print(valid_df['category'].value_counts() / valid_df.shape[0])"
+    "print(valid_df[\"category\"].value_counts() / valid_df.shape[0])"
    ]
   },
   {
@@ -398,8 +397,8 @@
    "outputs": [],
    "source": [
     "#dependent variables\n",
-    "y_train = train_df['category']\n",
-    "y_valid = valid_df['category']"
+    "y_train = train_df[\"category\"]\n",
+    "y_valid = valid_df[\"category\"]"
    ]
   },
   {
@@ -424,10 +423,10 @@
     "from sklearn.linear_model import LogisticRegression\n",
     "\n",
     "tfidf = TfidfVectorizer(tokenizer=process_text, ngram_range=(1,2), min_df=20, sublinear_tf=True)\n",
-    "tfidf_fit = tfidf.fit(all_df['texts'])\n",
-    "text_train = tfidf_fit.transform(train_df['texts'])\n",
-    "text_valid = tfidf_fit.transform(valid_df['texts'])\n",
-    "text_test = tfidf_fit.transform(test_df['texts'])\n",
+    "tfidf_fit = tfidf.fit(all_df[\"texts\"])\n",
+    "text_train = tfidf_fit.transform(train_df[\"texts\"])\n",
+    "text_valid = tfidf_fit.transform(valid_df[\"texts\"])\n",
+    "text_test = tfidf_fit.transform(test_df[\"texts\"])\n",
     "text_train.shape, text_valid.shape"
    ]
   },
@@ -459,11 +458,11 @@
     "from sklearn.preprocessing import StandardScaler\n",
     "\n",
     "scaler = StandardScaler()\n",
-    "scaler_fit = scaler.fit(all_df[['wc','uwc']].astype(float))\n",
+    "scaler_fit = scaler.fit(all_df[[\"wc\",\"uwc\"]].astype(float))\n",
     "print(scaler_fit.mean_, scaler_fit.var_)\n",
-    "num_train = scaler_fit.transform(train_df[['wc','uwc']].astype(float))\n",
-    "num_valid = scaler_fit.transform(valid_df[['wc','uwc']].astype(float))\n",
-    "num_test = scaler_fit.transform(test_df[['wc','uwc']].astype(float))\n",
+    "num_train = scaler_fit.transform(train_df[[\"wc\",\"uwc\"]].astype(float))\n",
+    "num_valid = scaler_fit.transform(valid_df[[\"wc\",\"uwc\"]].astype(float))\n",
+    "num_test = scaler_fit.transform(test_df[[\"wc\",\"uwc\"]].astype(float))\n",
     "num_train.shape, num_valid.shape"
    ]
   },
@@ -516,7 +515,7 @@
    ],
    "source": [
     "#fit logistic regression models\n",
-    "model = LogisticRegression(C=2., penalty='l2', solver='liblinear', dual=False, multi_class='ovr')\n",
+    "model = LogisticRegression(C=2., penalty=\"l2\", solver=\"liblinear\", dual=False, multi_class=\"ovr\")\n",
     "model.fit(X_train,y_train)\n",
     "model.score(X_valid,y_valid)"
    ]
@@ -537,14 +536,14 @@
     "probs = model.predict_proba(X_valid)\n",
     "probs_df = pd.DataFrame(probs)\n",
     "probs_df.columns = model.classes_\n",
-    "probs_df['preds'] = model.predict(X_valid)\n",
-    "probs_df['category'] = valid_df.category\n",
-    "probs_df['texts'] = valid_df.texts\n",
-    "probs_df['processed'] = valid_df.processed\n",
-    "probs_df['wc'] = valid_df.wc\n",
-    "probs_df['uwc'] = valid_df.uwc\n",
-    "probs_df['hit'] = (probs_df.preds==probs_df.category)\n",
-    "probs_df.to_csv('probs_df_linear.csv',index=False)"
+    "probs_df[\"preds\"] = model.predict(X_valid)\n",
+    "probs_df[\"category\"] = valid_df.category\n",
+    "probs_df[\"texts\"] = valid_df.texts\n",
+    "probs_df[\"processed\"] = valid_df.processed\n",
+    "probs_df[\"wc\"] = valid_df.wc\n",
+    "probs_df[\"uwc\"] = valid_df.uwc\n",
+    "probs_df[\"hit\"] = (probs_df.preds==probs_df.category)\n",
+    "probs_df.to_csv(\"probs_df_linear.csv\", index=False)"
    ]
   },
   {
@@ -577,10 +576,10 @@
     "\n",
     "conf_mat = confusion_matrix(probs_df.category,probs_df.preds)\n",
     "print(model.score(X_valid,y_valid))\n",
-    "sns.heatmap(conf_mat, annot=True, fmt='d',\n",
+    "sns.heatmap(conf_mat, annot=True, fmt=\"d\",\n",
     "            xticklabels=model.classes_, yticklabels=model.classes_)\n",
-    "plt.ylabel('Actual')\n",
-    "plt.xlabel('Predicted')\n",
+    "plt.ylabel(\"Actual\")\n",
+    "plt.xlabel(\"Predicted\")\n",
     "plt.show()"
    ]
   },
@@ -601,8 +600,8 @@
     "from fastai.callbacks import CSVLogger, SaveModelCallback\n",
     "from pythainlp.ulmfit import *\n",
     "\n",
-    "model_path = 'wisesight_data/'\n",
-    "all_df = pd.read_csv('all_df.csv')\n",
+    "model_path = \"wisesight_data/\"\n",
+    "all_df = pd.read_csv(\"all_df.csv\")\n",
     "train_df, valid_df = train_test_split(all_df, test_size=0.15, random_state=1412)"
    ]
   },
@@ -619,11 +618,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tt = Tokenizer(tok_func = ThaiTokenizer, lang = 'th', pre_rules = pre_rules_th, post_rules=post_rules_th)\n",
+    "tt = Tokenizer(tok_func=ThaiTokenizer, lang=\"th\", pre_rules=pre_rules_th, post_rules=post_rules_th)\n",
     "processor = [TokenizeProcessor(tokenizer=tt, chunksize=10000, mark_fields=False),\n",
     "            NumericalizeProcessor(vocab=None, max_vocab=60000, min_freq=2)]\n",
     "\n",
-    "data_lm = (TextList.from_df(all_df, model_path, cols='texts', processor=processor)\n",
+    "data_lm = (TextList.from_df(all_df, model_path, cols=\"texts\", processor=processor)\n",
     "    .random_split_by_pct(valid_pct = 0.01, seed = 1412)\n",
     "    .label_for_lm()\n",
     "    .databunch(bs=48))\n",
@@ -708,7 +707,7 @@
    ],
    "source": [
     "#train frozen\n",
-    "print('training frozen')\n",
+    "print(\"training frozen\")\n",
     "learn.freeze_to(-1)\n",
     "learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))"
    ]
@@ -777,7 +776,7 @@
    ],
    "source": [
     "#train unfrozen\n",
-    "print('training unfrozen')\n",
+    "print(\"training unfrozen\")\n",
     "learn.unfreeze()\n",
     "learn.fit_one_cycle(5, 1e-3, moms=(0.8, 0.7))"
    ]
@@ -789,7 +788,7 @@
    "outputs": [],
    "source": [
     "# learn.save('wisesight_lm')\n",
-    "learn.save_encoder('wisesight_enc')"
+    "learn.save_encoder(\"wisesight_enc\")"
    ]
   },
   {
@@ -814,17 +813,17 @@
    ],
    "source": [
     "#lm data\n",
-    "data_lm = load_data(model_path,'wisesight_lm.pkl')\n",
+    "data_lm = load_data(model_path, \"wisesight_lm.pkl\")\n",
     "data_lm.sanity_check()\n",
     "\n",
     "#classification data\n",
-    "tt = Tokenizer(tok_func = ThaiTokenizer, lang = 'th', pre_rules = pre_rules_th, post_rules=post_rules_th)\n",
+    "tt = Tokenizer(tok_func=ThaiTokenizer, lang=\"th\", pre_rules=pre_rules_th, post_rules=post_rules_th)\n",
     "processor = [TokenizeProcessor(tokenizer=tt, chunksize=10000, mark_fields=False),\n",
     "            NumericalizeProcessor(vocab=data_lm.vocab, max_vocab=60000, min_freq=20)]\n",
     "\n",
-    "data_cls = (ItemLists(model_path,train=TextList.from_df(train_df, model_path, cols=['texts'], processor=processor),\n",
-    "                     valid=TextList.from_df(valid_df, model_path, cols=['texts'], processor=processor))\n",
-    "    .label_from_df('category')\n",
+    "data_cls = (ItemLists(model_path,train=TextList.from_df(train_df, model_path, cols=[\"texts\"], processor=processor),\n",
+    "                     valid=TextList.from_df(valid_df, model_path, cols=[\"texts\"], processor=processor))\n",
+    "    .label_from_df(\"category\")\n",
     "    .databunch(bs=50)\n",
     "    )\n",
     "data_cls.sanity_check()\n",
@@ -844,7 +843,7 @@
     "\n",
     "learn = text_classifier_learner(data_cls, AWD_LSTM, config=config, pretrained=False, **trn_args)\n",
     "#load pretrained finetuned model\n",
-    "learn.load_encoder('wisesight_enc')"
+    "learn.load_encoder(\"wisesight_enc\")"
    ]
   },
   {
@@ -909,7 +908,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "learn.load('bestmodel');\n",
+    "learn.load(\"bestmodel\")\n",
+    "\n",
     "#get predictions\n",
     "probs, y_true, loss = learn.get_preds(ds_type = DatasetType.Valid, ordered=True, with_loss=True)\n",
     "classes = learn.data.train_ds.classes\n",
@@ -938,9 +938,9 @@
    "source": [
     "to_df = np.concatenate([y_true[:,None],preds[:,None],loss[:,None],prob],1)\n",
     "probs_df = pd.DataFrame(to_df)\n",
-    "probs_df.columns = ['category','preds','loss'] + classes\n",
-    "probs_df['hit'] = (probs_df.category == probs_df.preds)\n",
-    "probs_df['texts'] = valid_df.texts\n",
+    "probs_df.columns = [\"category\",\"preds\",\"loss\"] + classes\n",
+    "probs_df[\"hit\"] = (probs_df.category == probs_df.preds)\n",
+    "probs_df[\"texts\"] = valid_df.texts\n",
     "(y_true==preds).mean()"
    ]
   },
@@ -967,10 +967,10 @@
     "import seaborn as sns\n",
     "\n",
     "conf_mat = confusion_matrix(probs_df.category,probs_df.preds)\n",
-    "sns.heatmap(conf_mat, annot=True, fmt='d',\n",
+    "sns.heatmap(conf_mat, annot=True, fmt=\"d\",\n",
     "            xticklabels=classes, yticklabels=classes)\n",
-    "plt.ylabel('Actual')\n",
-    "plt.xlabel('Predicted')\n",
+    "plt.ylabel(\"Actual\")\n",
+    "plt.xlabel(\"Predicted\")\n",
     "plt.show()"
    ]
   }
@@ -991,7 +991,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
index 7d873a9a1..21a18a9c1 100644
--- a/pythainlp/__init__.py
+++ b/pythainlp/__init__.py
@@ -1,6 +1,6 @@
 ﻿# -*- coding: utf-8 -*-
 
-__version__ = 2.0
+__version__ = "2.0.2"
 
 thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"  # 44 chars
 thai_vowels = "ฤฦะ\u0e31าำ\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39เแโใไ\u0e45\u0e47"  # 19
@@ -25,8 +25,8 @@
 
 
 from pythainlp.soundex import soundex
-from pythainlp.spell import spell
+from pythainlp.spell import correct, spell
 from pythainlp.tag import pos_tag
-from pythainlp.tokenize import sent_tokenize, tcc, word_tokenize
+from pythainlp.tokenize import sent_tokenize, tcc, word_tokenize, Tokenizer
 from pythainlp.transliterate import romanize, transliterate
 from pythainlp.util import collate, thai_strftime
diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py
index f41744dac..855215bc0 100644
--- a/pythainlp/corpus/__init__.py
+++ b/pythainlp/corpus/__init__.py
@@ -113,7 +113,7 @@ def download(name: str, force: bool = False):
     data_json = data.json()
     if name in list(data_json.keys()):
         temp_name = data_json[name]
-        print("Download : " + name)
+        print("Download: " + name)
 
         if not db.search(temp.name == name):
             print(name + " " + temp_name["version"])
diff --git a/pythainlp/soundex/__init__.py b/pythainlp/soundex/__init__.py
index 30cfcd0a7..fac5f978d 100644
--- a/pythainlp/soundex/__init__.py
+++ b/pythainlp/soundex/__init__.py
@@ -12,7 +12,7 @@
 # [KSS97] https://linux.thai.net/~thep/soundex/soundex.html
 
 
-def soundex(text, engine="udom83"):
+def soundex(text: str, engine="udom83") -> str:
     """
     Thai Soundex
 
diff --git a/pythainlp/soundex/lk82.py b/pythainlp/soundex/lk82.py
index f7b21a764..e0dee6d6b 100644
--- a/pythainlp/soundex/lk82.py
+++ b/pythainlp/soundex/lk82.py
@@ -21,7 +21,7 @@
 _RE_3 = re.compile(r"[็ํฺๆฯ]")
 
 
-def lk82(text):
+def lk82(text: str) -> str:
     """
     LK82 - It's a Thai soundex rule.
 
diff --git a/pythainlp/soundex/metasound.py b/pythainlp/soundex/metasound.py
index c5f7f8233..6998f81a9 100644
--- a/pythainlp/soundex/metasound.py
+++ b/pythainlp/soundex/metasound.py
@@ -20,7 +20,7 @@
 _C8 = "ว"  # W -> 8
 
 
-def metasound(text, length=4):
+def metasound(text: str, length: int = 4) -> str:
     """
     Thai MetaSound
 
diff --git a/pythainlp/soundex/udom83.py b/pythainlp/soundex/udom83.py
index bf7ec5bba..dce60feaa 100644
--- a/pythainlp/soundex/udom83.py
+++ b/pythainlp/soundex/udom83.py
@@ -29,7 +29,7 @@
 )
 
 
-def udom83(text):
+def udom83(text: str) -> str:
     """
     Udom83 - It's a Thai soundex rule.
 
diff --git a/pythainlp/spell/__init__.py b/pythainlp/spell/__init__.py
index cfd06682b..c4b654f53 100644
--- a/pythainlp/spell/__init__.py
+++ b/pythainlp/spell/__init__.py
@@ -3,11 +3,14 @@
 Spell checking
 """
 
-from .pn import correct as pn_correct
-from .pn import spell as pn_spell
+from typing import List
 
+from .pn import DEFAULT_SPELL_CHECKER, NorvigSpellChecker
 
-def spell(word, engine="pn"):
+__all__ = ["DEFAULT_SPELL_CHECKER", "correct", "spell", "NorvigSpellChecker"]
+
+
+def spell(word: str, engine="pn") -> List[str]:
     """
     :param str word: word to check spelling
     :param str engine:
@@ -15,10 +18,10 @@ def spell(word, engine="pn"):
     :return: list of words
     """
 
-    return pn_spell(word)
+    return DEFAULT_SPELL_CHECKER.spell(word)
 
 
-def correct(word, engine="pn"):
+def correct(word: str, engine="pn") -> str:
     """
     :param str word: word to correct spelling
     :param str engine:
@@ -26,4 +29,4 @@ def correct(word, engine="pn"):
     :return: the corrected word
     """
 
-    return pn_correct(word)
+    return DEFAULT_SPELL_CHECKER.correct(word)
diff --git a/pythainlp/spell/pn.py b/pythainlp/spell/pn.py
index 84def66f3..ddce3d5c7 100644
--- a/pythainlp/spell/pn.py
+++ b/pythainlp/spell/pn.py
@@ -7,26 +7,33 @@
 Based on Peter Norvig's Python code from http://norvig.com/spell-correct.html
 """
 from collections import Counter
+from typing import Callable, List, Set, Tuple
 
 from pythainlp import thai_letters
 from pythainlp.corpus import tnc
-from pythainlp.util import is_thaichar
+from pythainlp.util import isthaichar
 
 
-def _no_filter(word):
+def _no_filter(word: str) -> bool:
     return True
 
 
-def _is_thai_and_not_num(word):
+def _is_thai_and_not_num(word: str) -> bool:
     for ch in word:
-        if ch != "." and not is_thaichar(ch):
+        if ch != "." and not isthaichar(ch):
             return False
         if ch in "๐๑๒๓๔๕๖๗๘๙0123456789":
             return False
     return True
 
 
-def _keep(word_freq, min_freq, min_len, max_len, dict_filter):
+def _keep(
+    word_freq: int,
+    min_freq: int,
+    min_len: int,
+    max_len: int,
+    dict_filter: Callable[[str], bool],
+):
     """
     Keep only Thai words with at least min_freq frequency
     and has length between min_len and max_len characters
@@ -41,7 +48,7 @@ def _keep(word_freq, min_freq, min_len, max_len, dict_filter):
     return dict_filter(word)
 
 
-def _edits1(word):
+def _edits1(word: str) -> Set[str]:
     """
     Return a set of words with edit distance of 1 from the input word
     """
@@ -54,7 +61,7 @@ def _edits1(word):
     return set(deletes + transposes + replaces + inserts)
 
 
-def _edits2(word):
+def _edits2(word: str) -> Set[str]:
     """
     Return a set of words with edit distance of 2 from the input word
     """
@@ -64,11 +71,11 @@ def _edits2(word):
 class NorvigSpellChecker:
     def __init__(
         self,
-        custom_dict=None,
-        min_freq=2,
-        min_len=2,
-        max_len=40,
-        dict_filter=_is_thai_and_not_num,
+        custom_dict: List[Tuple[str, int]] = None,
+        min_freq: int = 2,
+        min_len: int = 2,
+        max_len: int = 40,
+        dict_filter: Callable[[str], bool] = _is_thai_and_not_num,
     ):
         """
         Initialize Peter Norvig's spell checker object
@@ -97,13 +104,13 @@ def __init__(
         if self.__WORDS_TOTAL < 1:
             self.__WORDS_TOTAL = 0
 
-    def dictionary(self):
+    def dictionary(self) -> List[Tuple[str, int]]:
         """
         Return the spelling dictionary currently used by this spell checker
         """
         return self.__WORDS.items()
 
-    def known(self, words):
+    def known(self, words: List[str]) -> List[str]:
         """
         Return a list of given words that found in the spelling dictionary
 
@@ -111,7 +118,7 @@ def known(self, words):
         """
         return list(w for w in words if w in self.__WORDS)
 
-    def prob(self, word):
+    def prob(self, word: str) -> float:
         """
         Return probability of an input word, according to the spelling dictionary
 
@@ -119,7 +126,7 @@ def prob(self, word):
         """
         return self.__WORDS[word] / self.__WORDS_TOTAL
 
-    def freq(self, word):
+    def freq(self, word: str) -> int:
         """
         Return frequency of an input word, according to the spelling dictionary
 
@@ -127,7 +134,7 @@ def freq(self, word):
         """
         return self.__WORDS[word]
 
-    def spell(self, word):
+    def spell(self, word: str) -> List[str]:
         """
         Return a list of possible words, according to edit distance of 1 and 2,
         sorted by frequency of word occurrance in the spelling dictionary
@@ -147,7 +154,7 @@ def spell(self, word):
 
         return candidates
 
-    def correct(self, word):
+    def correct(self, word: str) -> str:
         """
         Return the most possible word, using the probability from the spelling dictionary
 
@@ -160,49 +167,3 @@ def correct(self, word):
 
 
 DEFAULT_SPELL_CHECKER = NorvigSpellChecker()
-
-
-def dictionary():
-    """
-    Return the spelling dictionary currently used by this spell checker.
-    The spelling dictionary is based on words found in the Thai National Corpus.
-    """
-    return DEFAULT_SPELL_CHECKER.dictionary()
-
-
-def known(words):
-    """
-    Return a list of given words that found in the spelling dictionary.
-    The spelling dictionary is based on words found in the Thai National Corpus.
-
-    :param str words: A list of words to check if they are in the spelling dictionary
-    """
-    return DEFAULT_SPELL_CHECKER.known(words)
-
-
-def prob(word):
-    """
-    Return probability of an input word, according to the Thai National Corpus
-
-    :param str word: A word to check its probability of occurrence
-    """
-    return DEFAULT_SPELL_CHECKER.prob(word)
-
-
-def spell(word):
-    """
-    Return a list of possible words, according to edit distance of 1 and 2,
-    sorted by probability of word occurrance in the Thai National Corpus.
-
-    :param str word: A word to check its spelling
-    """
-    return DEFAULT_SPELL_CHECKER.spell(word)
-
-
-def correct(word):
-    """
-    Return the most possible word, according to probability from the Thai National Corpus
-
-    :param str word: A word to correct its spelling
-    """
-    return DEFAULT_SPELL_CHECKER.correct(word)
diff --git a/pythainlp/summarize/freq.py b/pythainlp/summarize/freq.py
index c7bc25ff9..2dc7044fd 100644
--- a/pythainlp/summarize/freq.py
+++ b/pythainlp/summarize/freq.py
@@ -33,10 +33,10 @@ def __compute_frequencies(self, word_tokenized_sents):
 
         return word_freqs
 
-    def __rank(self, ranking, n):
+    def __rank(self, ranking, n: int):
         return nlargest(n, ranking, key=ranking.get)
 
-    def summarize(self, text, n, tokenizer):
+    def summarize(self, text: str, n: int, tokenizer: str):
         sents = sent_tokenize(text)
         word_tokenized_sents = [word_tokenize(sent, tokenizer) for sent in sents]
         self.__freq = self.__compute_frequencies(word_tokenized_sents)
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index 9b0232b78..6f788aaf0 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -4,31 +4,29 @@
 such as its part of speech and class of named-entity.
 """
 
-__all__ = [
-    "pos_tag",
-    "pos_tag_sents",
-    "tag_provinces"
-]
+from typing import List, Tuple
+
+__all__ = ["pos_tag", "pos_tag_sents", "tag_provinces"]
 from .locations import tag_provinces
 
 # tag map for orchid to Universal Dependencies
-# from Korakot Chaovavanich 
+# from Korakot Chaovavanich
 _TAG_MAP_UD = {
-    #NOUN
-    "NOUN":"NOUN",
-    "NCMN":"NOUN",
-    "NTTL":"NOUN",
-    "CNIT":"NOUN",
-    "CLTV":"NOUN",
-    "CMTR":"NOUN",
-    "CFQC":"NOUN",
-    "CVBL":"NOUN",
+    # NOUN
+    "NOUN": "NOUN",
+    "NCMN": "NOUN",
+    "NTTL": "NOUN",
+    "CNIT": "NOUN",
+    "CLTV": "NOUN",
+    "CMTR": "NOUN",
+    "CFQC": "NOUN",
+    "CVBL": "NOUN",
     # VERB
-    "VACT":"VERB",
-    "VSTA":"VERB",
-    #PRON
-    "PRON":"PRON",
-    "NPRP":"PRON",
+    "VACT": "VERB",
+    "VSTA": "VERB",
+    # PRON
+    "PRON": "PRON",
+    "NPRP": "PRON",
     # ADJ
     "ADJ": "ADJ",
     "NONM": "ADJ",
@@ -40,13 +38,13 @@
     "ADVI": "ADV",
     "ADVP": "ADV",
     "ADVS": "ADV",
-	# INT
+    # INT
     "INT": "INTJ",
     # PRON
-    "PROPN":"PROPN",
-    "PPRS":"PROPN",
-    "PDMN":"PROPN",
-    "PNTR":"PROPN",
+    "PROPN": "PROPN",
+    "PPRS": "PROPN",
+    "PDMN": "PROPN",
+    "PNTR": "PROPN",
     # DET
     "DET": "DET",
     "DDAN": "DET",
@@ -56,57 +54,74 @@
     "DIAC": "DET",
     "DIBQ": "DET",
     "DIAQ": "DET",
-    "DCNM": "DET",
     # NUM
     "NUM": "NUM",
     "NCNM": "NUM",
     "NLBL": "NUM",
     "DCNM": "NUM",
-	# AUX
+    # AUX
     "AUX": "AUX",
     "XVBM": "AUX",
     "XVAM": "AUX",
     "XVMM": "AUX",
     "XVBB": "AUX",
     "XVAE": "AUX",
-	# ADP
+    # ADP
     "ADP": "ADP",
     "RPRE": "ADP",
     # CCONJ
-    "CCONJ":"CCONJ",
-    "JCRG":"CCONJ",
-	# SCONJ
-    "SCONJ":"SCONJ",
-    "PREL":"SCONJ",
-    "JSBR":"SCONJ",
-    "JCMP":"SCONJ",
+    "CCONJ": "CCONJ",
+    "JCRG": "CCONJ",
+    # SCONJ
+    "SCONJ": "SCONJ",
+    "PREL": "SCONJ",
+    "JSBR": "SCONJ",
+    "JCMP": "SCONJ",
     # PART
-    "PART":"PART",
-    "FIXN":"PART",
-    "FIXV":"PART",
-    "EAFF":"PART",
-    "EITT":"PART",
-    "AITT":"PART",
-    "NEG":"PART",
+    "PART": "PART",
+    "FIXN": "PART",
+    "FIXV": "PART",
+    "EAFF": "PART",
+    "EITT": "PART",
+    "AITT": "PART",
+    "NEG": "PART",
     # PUNCT
-    "PUNCT":"PUNCT",
-    "PUNC":"PUNCT"
+    "PUNCT": "PUNCT",
+    "PUNC": "PUNCT",
 }
 
-def _UD_Exception(w,tag):
-	if w=="การ" or w=="ความ":
-		return "NOUN"
-	return tag
 
-def _orchid_to_ud(tag):
-	_i=0
-	temp=[]
-	while _i<len(tag):
-		temp.append((tag[_i][0],_UD_Exception(tag[_i][0],_TAG_MAP_UD[tag[_i][1]])))
-		_i+=1
-	return temp
+def _UD_Exception(w: str, tag: str) -> str:
+    if w == "การ" or w == "ความ":
+        return "NOUN"
+
+    return tag
+
+
+def _orchid_to_ud(tag) -> List[Tuple[str, str]]:
+    _i = 0
+    temp = []
+    while _i < len(tag):
+        temp.append((tag[_i][0], _UD_Exception(tag[_i][0], _TAG_MAP_UD[tag[_i][1]])))
+        _i += 1
+
+    return temp
+
+
+def _artagger_tag(words: List[str], corpus: str = None) -> List[Tuple[str, str]]:
+    if not words:
+        return []
+
+    from artagger import Tagger
 
-def pos_tag(words, engine="perceptron", corpus="orchid"):
+    words_ = Tagger().tag(" ".join(words))
+
+    return [(word.word, word.tag) for word in words_]
+
+
+def pos_tag(
+    words: List[str], engine: str = "perceptron", corpus: str = "orchid"
+) -> List[Tuple[str, str]]:
     """
     Part of Speech tagging function.
 
@@ -121,41 +136,36 @@ def pos_tag(words, engine="perceptron", corpus="orchid"):
         * pud - Parallel Universal Dependencies (PUD) treebanks
     :return: returns a list of labels regarding which part of speech it is
     """
-    _corpus=corpus
-    _tag=[]
-    if corpus=="orchid_ud":
-        corpus="orchid"
+    _corpus = corpus
+    _tag = []
+    if corpus == "orchid_ud":
+        corpus = "orchid"
     if not words:
         return []
 
     if engine == "perceptron":
         from .perceptron import tag as tag_
     elif engine == "artagger":
-
-        def tag_(words, corpus=None):
-            if not words:
-                return []
-
-            from artagger import Tagger
-            words_ = Tagger().tag(" ".join(words))
-
-            return [(word.word, word.tag) for word in words_]
-
+        tag_ = _artagger_tag
     else:  # default, use "unigram" ("old") engine
         from .unigram import tag as tag_
-    _tag= tag_(words, corpus=corpus)
-    if _corpus=="orchid_ud":
-        _tag=_orchid_to_ud(_tag)
+    _tag = tag_(words, corpus=corpus)
+
+    if _corpus == "orchid_ud":
+        _tag = _orchid_to_ud(_tag)
+
     return _tag
 
 
-def pos_tag_sents(sentences, engine="perceptron", corpus="orchid"):
+def pos_tag_sents(
+    sentences: List[List[str]], engine: str = "perceptron", corpus: str = "orchid"
+) -> List[List[Tuple[str, str]]]:
     """
     Part of Speech tagging Sentence function.
 
-    :param list sentences: a list of tokenized sentences (a list of tokenized words in sentences)
+    :param list sentences: a list of lists of tokenized words
     :param str engine:
-        * unigram - unigram tagger 
+        * unigram - unigram tagger
         * perceptron - perceptron tagger (default)
         * artagger - RDR POS tagger
     :param str corpus:
diff --git a/pythainlp/tag/locations.py b/pythainlp/tag/locations.py
index 01bf3060c..74fb96e5d 100644
--- a/pythainlp/tag/locations.py
+++ b/pythainlp/tag/locations.py
@@ -3,10 +3,12 @@
 Recognizes locations in text
 """
 
+from typing import List, Tuple
+
 from pythainlp.corpus import provinces
 
 
-def tag_provinces(tokens):
+def tag_provinces(tokens: List[str]) -> List[Tuple[str, str]]:
     """
     Recognize Thailand provinces in text
 
diff --git a/pythainlp/tag/named_entity.py b/pythainlp/tag/named_entity.py
index a1236d171..dca5d18b8 100644
--- a/pythainlp/tag/named_entity.py
+++ b/pythainlp/tag/named_entity.py
@@ -5,20 +5,22 @@
 
 __all__ = ["ThaiNameTagger"]
 
+from typing import List, Tuple, Union
+
 import sklearn_crfsuite
 from pythainlp.corpus import download, get_corpus_path, thai_stopwords
 from pythainlp.tag import pos_tag
 from pythainlp.tokenize import word_tokenize
-from pythainlp.util import is_thaiword
+from pythainlp.util import isthai
 
 _WORD_TOKENIZER = "newmm"  # ตัวตัดคำ
 
 
-def _is_stopword(word):  # เช็คว่าเป็นคำฟุ่มเฟือย
+def _is_stopword(word: str) -> bool:  # เช็คว่าเป็นคำฟุ่มเฟือย
     return word in thai_stopwords()
 
 
-def _doc2features(doc, i):
+def _doc2features(doc, i) -> dict:
     word = doc[i][0]
     postag = doc[i][1]
 
@@ -26,7 +28,7 @@ def _doc2features(doc, i):
     features = {
         "word.word": word,
         "word.stopword": _is_stopword(word),
-        "word.isthai": is_thaiword(word),
+        "word.isthai": isthai(word),
         "word.isspace": word.isspace(),
         "postag": postag,
         "word.isdigit": word.isdigit(),
@@ -41,7 +43,7 @@ def _doc2features(doc, i):
         prev_features = {
             "word.prevword": prevword,
             "word.previsspace": prevword.isspace(),
-            "word.previsthai": is_thaiword(prevword),
+            "word.previsthai": isthai(prevword),
             "word.prevstopword": _is_stopword(prevword),
             "word.prevpostag": prevpostag,
             "word.prevwordisdigit": prevword.isdigit(),
@@ -58,7 +60,7 @@ def _doc2features(doc, i):
             "word.nextword": nextword,
             "word.nextisspace": nextword.isspace(),
             "word.nextpostag": nextpostag,
-            "word.nextisthai": is_thaiword(nextword),
+            "word.nextisthai": isthai(nextword),
             "word.nextstopword": _is_stopword(nextword),
             "word.nextwordisdigit": nextword.isdigit(),
         }
@@ -87,7 +89,9 @@ def __init__(self):
             model_filename=self.__data_path,
         )
 
-    def get_ner(self, text, pos=True):
+    def get_ner(
+        self, text: str, pos: bool = True
+    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]]]:
         """
         Get named-entities in text
 
@@ -101,10 +105,11 @@ def get_ner(self, text, pos=True):
             >>> ner = ThaiNameTagger()
             >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.")
             [('วันที่', 'NOUN', 'O'), (' ', 'PUNCT', 'O'), ('15', 'NUM', 'B-DATE'),
-            (' ', 'PUNCT', 'I-DATE'), ('ก.ย.', 'NOUN', 'I-DATE'), (' ', 'PUNCT', 'I-DATE'), 
-            ('61', 'NUM', 'I-DATE'), (' ', 'PUNCT', 'O'), ('ทดสอบ', 'VERB', 'O'), 
-            ('ระบบ', 'NOUN', 'O'), ('เวลา', 'NOUN', 'O'), (' ', 'PUNCT', 'O'), 
-            ('14', 'NOUN', 'B-TIME'), (':', 'PUNCT', 'I-TIME'), ('49', 'NUM', 'I-TIME'), 
+            (' ', 'PUNCT', 'I-DATE'), ('ก.ย.', 'NOUN', 'I-DATE'),
+            (' ', 'PUNCT', 'I-DATE'), ('61', 'NUM', 'I-DATE'),
+            (' ', 'PUNCT', 'O'), ('ทดสอบ', 'VERB', 'O'),
+            ('ระบบ', 'NOUN', 'O'), ('เวลา', 'NOUN', 'O'), (' ', 'PUNCT', 'O'),
+            ('14', 'NOUN', 'B-TIME'), (':', 'PUNCT', 'I-TIME'), ('49', 'NUM', 'I-TIME'),
             (' ', 'PUNCT', 'I-TIME'), ('น.', 'NOUN', 'I-TIME')]
             >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.", pos=False)
             [('วันที่', 'O'), (' ', 'O'), ('15', 'B-DATE'), (' ', 'I-DATE'),
@@ -113,7 +118,9 @@ def get_ner(self, text, pos=True):
             (':', 'I-TIME'), ('49', 'I-TIME'), (' ', 'I-TIME'), ('น.', 'I-TIME')]
         """
         self.__tokens = word_tokenize(text, engine=_WORD_TOKENIZER)
-        self.__pos_tags = pos_tag(self.__tokens,engine="perceptron", corpus="orchid_ud")
+        self.__pos_tags = pos_tag(
+            self.__tokens, engine="perceptron", corpus="orchid_ud"
+        )
         self.__x_test = self.__extract_features(self.__pos_tags)
         self.__y = self.crf.predict_single(self.__x_test)
 
diff --git a/pythainlp/tag/perceptron.py b/pythainlp/tag/perceptron.py
index 4032df759..ccff12427 100644
--- a/pythainlp/tag/perceptron.py
+++ b/pythainlp/tag/perceptron.py
@@ -3,6 +3,7 @@
 Perceptron Part-Of-Speech tagger
 """
 import os
+from typing import List, Tuple
 
 import dill
 from pythainlp.corpus import corpus_path
@@ -22,127 +23,124 @@ def _load_tagger(filename):
 _PUD_TAGGER = _load_tagger(_PUD_DATA_FILENAME)
 
 
-def tag(words, corpus="pud"):
+def tag(words: List[str], corpus: str = "pud") -> List[Tuple[str, str]]:
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
     if not words:
         return []
 
-    # perceptron tagger cannot handle empty string
-    #words = [word.strip() for word in words if word.strip()]
-
     if corpus == "orchid":
         tagger = _ORCHID_TAGGER
-        i=0
-        while i<len(words):
-            if words[i]==" ":
-                words[i]="<space>"
-            elif words[i]=="+":
-                words[i]="<plus>"
-            elif words[i]=="-":
-                words[i]="<minus>"
-            elif words[i]=="=":
-                words[i]="<equal>"
-            elif words[i]==",":
-                words[i]="<comma>"
-            elif words[i]=="$":
-                words[i]="<dollar>"
-            elif words[i]==".":
-                words[i]="<full_stop>"
-            elif words[i]=="(":
-                words[i]="<left_parenthesis>"
-            elif words[i]==")":
-                words[i]="<right_parenthesis>"
-            elif words[i]=='"':
-                words[i]="<quotation>"
-            elif words[i]=='@':
-                words[i]="<at_mark>"
-            elif words[i]=='&':
-                words[i]="<ampersand>"
-            elif words[i]=='{':
-                words[i]="<left_curly_bracket>"
-            elif words[i]=='^':
-                words[i]="<circumflex_accent>"
-            elif words[i]=='?':
-                words[i]="<question_mark>"
-            elif words[i]=='<':
-                words[i]="<less_than>"
-            elif words[i]=='>':
-                words[i]="<greater_than>"
-            elif words[i]=='=':
-                words[i]="<equal>"
-            elif words[i]=='!':
-                words[i]="<exclamation>"
-            elif words[i]=='’':
-                words[i]="<apostrophe>"
-            elif words[i]==':':
-                words[i]="<colon>"
-            elif words[i]=='*':
-                words[i]="<asterisk>"
-            elif words[i]==';':
-                words[i]="<semi_colon>"
-            elif words[i]=='/':
-                words[i]="<slash>"
-            i+=1
-        t2=tagger.tag(words)
-        t=[]
-        i=0
-        while i<len(t2):
-            word=t2[i][0]
-            tag=t2[i][1]
-            if word=="<space>" or word=='<space>':
-                word=" "
-            elif word=="<plus>":
-                word="+"
-            elif word=="<minus>":
-                word="-"
-            elif word=="<equal>":
-                word="="
-            elif word=="<comma>":
-                word=","
-            elif word=="<dollar>":
-                word="$"
-            elif word=="<full_stop>":
-                word="."
-            elif word=="<left_parenthesis>":
-                word="("
-            elif word=="<right_parenthesis>":
-                word=")"
-            elif word=="<quotation>":
-                word='"'
-            elif word=="<at_mark>":
-                word='@'
-            elif word=="<ampersand>":
-                word='&'
-            elif word=="<left_curly_bracket>":
-                word='{'
-            elif word=="<circumflex_accent>":
-                word='^'
-            elif word=="<question_mark>":
-                word='?'
-            elif word=="<less_than>":
-                word='<'
-            elif word=="<greater_than>":
-                word='>'
-            elif word=="<equal>":
-                word='='
-            elif word=="<exclamation>":
-                word='!'
-            elif word=="<apostrophe>":
-                word='’'
-            elif word=="<colon>":
-                word=':'
-            elif word=="<asterisk>":
-                word='*'
-            elif word=="<semi_colon>":
-                word=';'
-            elif word=="<slash>":
-                word='/'
-            t.append((word,tag))
-            i+=1
-        #t=temp
+        i = 0
+        while i < len(words):
+            if words[i] == " ":
+                words[i] = "<space>"
+            elif words[i] == "+":
+                words[i] = "<plus>"
+            elif words[i] == "-":
+                words[i] = "<minus>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == ",":
+                words[i] = "<comma>"
+            elif words[i] == "$":
+                words[i] = "<dollar>"
+            elif words[i] == ".":
+                words[i] = "<full_stop>"
+            elif words[i] == "(":
+                words[i] = "<left_parenthesis>"
+            elif words[i] == ")":
+                words[i] = "<right_parenthesis>"
+            elif words[i] == '"':
+                words[i] = "<quotation>"
+            elif words[i] == "@":
+                words[i] = "<at_mark>"
+            elif words[i] == "&":
+                words[i] = "<ampersand>"
+            elif words[i] == "{":
+                words[i] = "<left_curly_bracket>"
+            elif words[i] == "^":
+                words[i] = "<circumflex_accent>"
+            elif words[i] == "?":
+                words[i] = "<question_mark>"
+            elif words[i] == "<":
+                words[i] = "<less_than>"
+            elif words[i] == ">":
+                words[i] = "<greater_than>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == "!":
+                words[i] = "<exclamation>"
+            elif words[i] == "’":
+                words[i] = "<apostrophe>"
+            elif words[i] == ":":
+                words[i] = "<colon>"
+            elif words[i] == "*":
+                words[i] = "<asterisk>"
+            elif words[i] == ";":
+                words[i] = "<semi_colon>"
+            elif words[i] == "/":
+                words[i] = "<slash>"
+            i += 1
+        t2 = tagger.tag(words)
+        t = []
+        i = 0
+        while i < len(t2):
+            word = t2[i][0]
+            tag = t2[i][1]
+            if word == "<space>":
+                word = " "
+            elif word == "<plus>":
+                word = "+"
+            elif word == "<minus>":
+                word = "-"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<comma>":
+                word = ","
+            elif word == "<dollar>":
+                word = "$"
+            elif word == "<full_stop>":
+                word = "."
+            elif word == "<left_parenthesis>":
+                word = "("
+            elif word == "<right_parenthesis>":
+                word = ")"
+            elif word == "<quotation>":
+                word = '"'
+            elif word == "<at_mark>":
+                word = "@"
+            elif word == "<ampersand>":
+                word = "&"
+            elif word == "<left_curly_bracket>":
+                word = "{"
+            elif word == "<circumflex_accent>":
+                word = "^"
+            elif word == "<question_mark>":
+                word = "?"
+            elif word == "<less_than>":
+                word = "<"
+            elif word == "<greater_than>":
+                word = ">"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<exclamation>":
+                word = "!"
+            elif word == "<apostrophe>":
+                word = "’"
+            elif word == "<colon>":
+                word = ":"
+            elif word == "<asterisk>":
+                word = "*"
+            elif word == "<semi_colon>":
+                word = ";"
+            elif word == "<slash>":
+                word = "/"
+            t.append((word, tag))
+            i += 1
     else:  # default, use "pud" as a corpus
         tagger = _PUD_TAGGER
-        t=tagger.tag(words)
+        t = tagger.tag(words)
+
     return t
diff --git a/pythainlp/tag/unigram.py b/pythainlp/tag/unigram.py
index 863323a1f..ece6e3028 100644
--- a/pythainlp/tag/unigram.py
+++ b/pythainlp/tag/unigram.py
@@ -4,6 +4,7 @@
 """
 import json
 import os
+from typing import List, Tuple
 
 import dill
 import nltk.tag
@@ -27,7 +28,7 @@ def _pud_tagger():
     return model
 
 
-def tag(words, corpus):
+def tag(words: List[str], corpus: str) -> List[Tuple[str, str]]:
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
@@ -36,116 +37,116 @@ def tag(words, corpus):
 
     if corpus == "orchid":
         tagger = nltk.tag.UnigramTagger(model=_orchid_tagger())
-        i=0
-        while i<len(words):
-            if words[i]==" ":
-                words[i]="<space>"
-            elif words[i]=="+":
-                words[i]="<plus>"
-            elif words[i]=="-":
-                words[i]="<minus>"
-            elif words[i]=="=":
-                words[i]="<equal>"
-            elif words[i]==",":
-                words[i]="<comma>"
-            elif words[i]=="$":
-                words[i]="<dollar>"
-            elif words[i]==".":
-                words[i]="<full_stop>"
-            elif words[i]=="(":
-                words[i]="<left_parenthesis>"
-            elif words[i]==")":
-                words[i]="<right_parenthesis>"
-            elif words[i]=='"':
-                words[i]="<quotation>"
-            elif words[i]=='@':
-                words[i]="<at_mark>"
-            elif words[i]=='&':
-                words[i]="<ampersand>"
-            elif words[i]=='{':
-                words[i]="<left_curly_bracket>"
-            elif words[i]=='^':
-                words[i]="<circumflex_accent>"
-            elif words[i]=='?':
-                words[i]="<question_mark>"
-            elif words[i]=='<':
-                words[i]="<less_than>"
-            elif words[i]=='>':
-                words[i]="<greater_than>"
-            elif words[i]=='=':
-                words[i]="<equal>"
-            elif words[i]=='!':
-                words[i]="<exclamation>"
-            elif words[i]=='’':
-                words[i]="<apostrophe>"
-            elif words[i]==':':
-                words[i]="<colon>"
-            elif words[i]=='*':
-                words[i]="<asterisk>"
-            elif words[i]==';':
-                words[i]="<semi_colon>"
-            elif words[i]=='/':
-                words[i]="<slash>"
-            i+=1
-        t=tagger.tag(words)
-        temp=[]
-        i=0
-        while i<len(t):
-            word=t[i][0]
-            tag=t[i][1]
-            if word=="<space>":
-                word=" "
-            elif word=="<plus>":
-                word="+"
-            elif word=="<minus>":
-                word="-"
-            elif word=="<equal>":
-                word="="
-            elif word=="<comma>":
-                word=","
-            elif word=="<dollar>":
-                word="$"
-            elif word=="<full_stop>":
-                word="."
-            elif word=="<left_parenthesis>":
-                word="("
-            elif word=="<right_parenthesis>":
-                word=")"
-            elif word=="<quotation>":
-                word='"'
-            elif word=="<at_mark>":
-                word='@'
-            elif word=="<ampersand>":
-                word='&'
-            elif word=="<left_curly_bracket>":
-                word='{'
-            elif word=="<circumflex_accent>":
-                word='^'
-            elif word=="<question_mark>":
-                word='?'
-            elif word=="<less_than>":
-                word='<'
-            elif word=="<greater_than>":
-                word='>'
-            elif word=="<equal>":
-                word='='
-            elif word=="<exclamation>":
-                word='!'
-            elif word=="<apostrophe>":
-                word='’'
-            elif word=="<colon>":
-                word=':'
-            elif word=="<asterisk>":
-                word='*'
-            elif word=="<semi_colon>":
-                word=';'
-            elif word=="<slash>":
-                word='/'
-            temp.append((word,tag))
-            i+=1
-        t=temp
+        i = 0
+        while i < len(words):
+            if words[i] == " ":
+                words[i] = "<space>"
+            elif words[i] == "+":
+                words[i] = "<plus>"
+            elif words[i] == "-":
+                words[i] = "<minus>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == ",":
+                words[i] = "<comma>"
+            elif words[i] == "$":
+                words[i] = "<dollar>"
+            elif words[i] == ".":
+                words[i] = "<full_stop>"
+            elif words[i] == "(":
+                words[i] = "<left_parenthesis>"
+            elif words[i] == ")":
+                words[i] = "<right_parenthesis>"
+            elif words[i] == '"':
+                words[i] = "<quotation>"
+            elif words[i] == "@":
+                words[i] = "<at_mark>"
+            elif words[i] == "&":
+                words[i] = "<ampersand>"
+            elif words[i] == "{":
+                words[i] = "<left_curly_bracket>"
+            elif words[i] == "^":
+                words[i] = "<circumflex_accent>"
+            elif words[i] == "?":
+                words[i] = "<question_mark>"
+            elif words[i] == "<":
+                words[i] = "<less_than>"
+            elif words[i] == ">":
+                words[i] = "<greater_than>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == "!":
+                words[i] = "<exclamation>"
+            elif words[i] == "’":
+                words[i] = "<apostrophe>"
+            elif words[i] == ":":
+                words[i] = "<colon>"
+            elif words[i] == "*":
+                words[i] = "<asterisk>"
+            elif words[i] == ";":
+                words[i] = "<semi_colon>"
+            elif words[i] == "/":
+                words[i] = "<slash>"
+            i += 1
+        t = tagger.tag(words)
+        temp = []
+        i = 0
+        while i < len(t):
+            word = t[i][0]
+            tag = t[i][1]
+            if word == "<space>":
+                word = " "
+            elif word == "<plus>":
+                word = "+"
+            elif word == "<minus>":
+                word = "-"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<comma>":
+                word = ","
+            elif word == "<dollar>":
+                word = "$"
+            elif word == "<full_stop>":
+                word = "."
+            elif word == "<left_parenthesis>":
+                word = "("
+            elif word == "<right_parenthesis>":
+                word = ")"
+            elif word == "<quotation>":
+                word = '"'
+            elif word == "<at_mark>":
+                word = "@"
+            elif word == "<ampersand>":
+                word = "&"
+            elif word == "<left_curly_bracket>":
+                word = "{"
+            elif word == "<circumflex_accent>":
+                word = "^"
+            elif word == "<question_mark>":
+                word = "?"
+            elif word == "<less_than>":
+                word = "<"
+            elif word == "<greater_than>":
+                word = ">"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<exclamation>":
+                word = "!"
+            elif word == "<apostrophe>":
+                word = "’"
+            elif word == "<colon>":
+                word = ":"
+            elif word == "<asterisk>":
+                word = "*"
+            elif word == "<semi_colon>":
+                word = ";"
+            elif word == "<slash>":
+                word = "/"
+            temp.append((word, tag))
+            i += 1
+        t = temp
     else:
         tagger = _pud_tagger()
-        t=tagger.tag(words)
+        t = tagger.tag(words)
 
     return t
diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py
index d3c9bb1d5..b87cf13e5 100644
--- a/pythainlp/tokenize/__init__.py
+++ b/pythainlp/tokenize/__init__.py
@@ -3,6 +3,8 @@
 Thai tokenizers
 """
 import re
+from typing import Iterable, List, Union
+
 from pythainlp.corpus import get_corpus, thai_syllables, thai_words
 
 from marisa_trie import Trie
@@ -11,11 +13,13 @@
 FROZEN_DICT_TRIE = Trie(get_corpus("words_th_frozen_201810.txt"))
 
 
-def word_tokenize(text, engine="newmm", whitespaces=True):
+def word_tokenize(
+    text: str, engine: str = "newmm", whitespaces: bool = True
+) -> List[str]:
     """
     :param str text: text to be tokenized
     :param str engine: tokenizer to be used
-    :param bool whitespaces: True to output no whitespace, a common mark of sentence or end of phrase in Thai
+    :param bool whitespaces: True to output no whitespace, a common mark of end of phrase in Thai
     :Parameters for engine:
         * newmm (default) - dictionary-based, Maximum Matching + Thai Character Cluster
         * longest - dictionary-based, Longest Matching
@@ -60,7 +64,9 @@ def segment(text):
     return segment(text)
 
 
-def dict_word_tokenize(text, custom_dict, engine="newmm"):
+def dict_word_tokenize(
+    text: str, custom_dict: Trie, engine: str = "newmm"
+) -> List[str]:
     """
     :meth:`dict_word_tokenize` tokenizes word based on the dictionary you provide. The format has to be in trie data structure.
     :param str text: text to be tokenized
@@ -90,7 +96,7 @@ def dict_word_tokenize(text, custom_dict, engine="newmm"):
     return segment(text, custom_dict)
 
 
-def sent_tokenize(text, engine="whitespace+newline"):
+def sent_tokenize(text: str, engine: str = "whitespace+newline") -> List[str]:
     """
     This function does not yet automatically recognize when a sentence actually ends. Rather it helps split text where white space and a new line is found.
 
@@ -106,28 +112,36 @@ def sent_tokenize(text, engine="whitespace+newline"):
     sentences = []
 
     if engine == "whitespace":
-        sentences = re.split(r' +', text, re.U)
+        sentences = re.split(r" +", text, re.U)
     else:  # default, use whitespace + newline
         sentences = text.split()
 
     return sentences
 
 
-def subword_tokenize(text, engine="tcc"):
+def subword_tokenize(text: str, engine: str = "tcc") -> List[str]:
     """
     :param str text: text to be tokenized
-    :param str engine: choosing 'tcc' uses the Thai Character Cluster rule to segment words into the smallest unique units.
+    :param str engine: subword tokenizer 
+    :Parameters for engine:
+        * tcc (default) -  Thai Character Cluster (Theeramunkong et al. 2000)
+        * etcc - Enhanced Thai Character Cluster (Inrut et al. 2001) [In development] 
     :return: a list of tokenized strings.
     """
     if not text:
         return ""
 
     from .tcc import tcc
+    from .etcc import etcc
 
+    if engine == "tcc":
+        return tcc(text) 
+    elif engine == "etcc":
+        return etcc(text).split("/")
+    #default
     return tcc(text)
 
-
-def syllable_tokenize(text):
+def syllable_tokenize(text: str) -> List[str]:
     """
     :param str text: input string to be tokenized
 
@@ -147,7 +161,7 @@ def syllable_tokenize(text):
     return tokens
 
 
-def dict_trie(dict_source):
+def dict_trie(dict_source: Union[str, Iterable]) -> Trie:
     """
     Create a dict trie which will be used for word_tokenize() function.
     For more information on the trie data structure,
@@ -162,17 +176,19 @@ def dict_trie(dict_source):
         with open(dict_source, "r", encoding="utf8") as f:
             _vocabs = f.read().splitlines()
             return Trie(_vocabs)
-    elif isinstance(dict_source, (list, tuple, set, frozenset)):
+    elif isinstance(dict_source, Iterable):
         # Received a sequence type object of vocabs
         return Trie(dict_source)
     else:
         raise TypeError(
-            "Type of dict_source must be either str (path to source file) or collections"
+            "Type of dict_source must be either str (path to source file) or iterable"
         )
 
 
 class Tokenizer:
-    def __init__(self, custom_dict=None,tokenize_engine="newmm"):
+    def __init__(
+        self, custom_dict: Union[str, Iterable] = None, tokenize_engine: str = "newmm"
+    ):
         """
         Initialize tokenizer object
 
@@ -180,20 +196,24 @@ def __init__(self, custom_dict=None,tokenize_engine="newmm"):
         :param str tokenize_engine: choose between different options of engine to token (newmm, mm, longest)
         """
         self.__trie_dict = None
-        self.word_engine=tokenize_engine
+        self.word_engine = tokenize_engine
         if custom_dict:
             self.__trie_dict = dict_trie(custom_dict)
         else:
             self.__trie_dict = dict_trie(thai_words())
-    def word_tokenize(self, text):
+
+    def word_tokenize(self, text: str) -> List[str]:
         """
         :param str text: text to be tokenized
 
         :return: list of words, tokenized from the text
         """
-        return dict_word_tokenize(text,custom_dict=self.__trie_dict,engine=self.word_engine)
-    def set_tokenize_engine(self,name_engine):
+        return dict_word_tokenize(
+            text, custom_dict=self.__trie_dict, engine=self.word_engine
+        )
+
+    def set_tokenize_engine(self, name_engine: str) -> None:
         """
         :param str name_engine: choose between different options of engine to token (newmm, mm, longest)
         """
-        self.word_engine=name_engine
\ No newline at end of file
+        self.word_engine = name_engine
diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
index 395e76583..a3844c2f3 100644
--- a/pythainlp/tokenize/deepcut.py
+++ b/pythainlp/tokenize/deepcut.py
@@ -3,8 +3,10 @@
 Wrapper for deepcut Thai word segmentation
 """
 
+from typing import List
+
 import deepcut
 
 
-def segment(text):
+def segment(text: str) -> List[str]:
     return deepcut.tokenize(text)
diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py
index dbe04122a..1df6eaaec 100644
--- a/pythainlp/tokenize/etcc.py
+++ b/pythainlp/tokenize/etcc.py
@@ -3,6 +3,8 @@
 โปรแกรม ETCC ใน Python
 พัฒนาโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
 19 มิ.ย. 2560
+Reference: Inrut, Jeeragone, Patiroop Yuanghirun, Sarayut Paludkong, Supot Nitsuwat, and Para Limmaneepraserth. "Thai word segmentation using combination of forward and backward longest matching techniques." In International Symposium on Communications and Information Technology (ISCIT), pp. 37-40. 2001.
+
 
 วิธีใช้งาน
 etcc(คำ)
@@ -20,7 +22,7 @@
 _UV2 = "[" + "".join(["ั", "ื"]) + "]"
 
 
-def etcc(text):
+def etcc(text: str) -> str:
     """
     Enhanced Thai Character Cluster (ETCC)
 
diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
index 33ff1fa0a..83ce495a1 100644
--- a/pythainlp/tokenize/longest.py
+++ b/pythainlp/tokenize/longest.py
@@ -39,14 +39,13 @@ class LongestMatchTokenizer(object):
     def __init__(self, trie):
         self.__trie = trie
 
-    def __search_nonthai(self, text):
+    def __search_nonthai(self, text: str):
         match = _RE_NONTHAI.search(text)
         if match.group(0):
             return match.group(0).lower()
-        else:
-            return None
+        return None
 
-    def __is_next_word_valid(self, text, begin_pos):
+    def __is_next_word_valid(self, text: str, begin_pos: int) -> bool:
         len_text = len(text)
         text = text[begin_pos:len_text].strip()
 
@@ -63,7 +62,7 @@ def __is_next_word_valid(self, text, begin_pos):
 
         return False
 
-    def __longest_matching(self, text, begin_pos):
+    def __longest_matching(self, text: str, begin_pos: int):
         len_text = len(text)
         text = text[begin_pos:len_text]
 
@@ -94,7 +93,7 @@ def __longest_matching(self, text, begin_pos):
         else:
             return ""
 
-    def __segment_text(self, text):
+    def __segment_text(self, text: str):
         if not text:
             return []
 
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
index 17815fd9f..066ff1017 100644
--- a/pythainlp/tokenize/newmm.py
+++ b/pythainlp/tokenize/newmm.py
@@ -9,6 +9,7 @@
 import re
 from collections import defaultdict
 from heapq import heappop, heappush  # for priority queue
+from typing import List
 
 from pythainlp.tokenize import DEFAULT_DICT_TRIE
 
@@ -38,7 +39,7 @@ def bfs_paths_graph(graph, start, goal):
                 queue.append((next, path + [next]))
 
 
-def onecut(text, trie):
+def onecut(text: str, trie):
     graph = defaultdict(list)  # main data structure
     allow_pos = tcc_pos(text)  # ตำแหน่งที่ตัด ต้องตรงกับ tcc
 
@@ -90,7 +91,7 @@ def onecut(text, trie):
 
 
 # ช่วยให้ไม่ต้องพิมพ์ยาวๆ
-def segment(text, trie=None):
+def segment(text: str, trie=None) -> List[str]:
     if not text:
         return []
 
diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py
index 23b7b38e4..33fc0aabc 100644
--- a/pythainlp/tokenize/pyicu.py
+++ b/pythainlp/tokenize/pyicu.py
@@ -3,11 +3,12 @@
 Wrapper for ICU word segmentation
 """
 import re
+from typing import List
 
 from icu import BreakIterator, Locale
 
 
-def _gen_words(text):
+def _gen_words(text: str) -> str:
     bd = BreakIterator.createWordInstance(Locale("th"))
     bd.setText(text)
     p = bd.first()
@@ -16,7 +17,7 @@ def _gen_words(text):
         p = q
 
 
-def segment(text):
+def segment(text: str) -> List[str]:
     if not text:
         return []
 
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
index b50bdb24a..ee945e929 100644
--- a/pythainlp/tokenize/tcc.py
+++ b/pythainlp/tokenize/tcc.py
@@ -1,8 +1,9 @@
 ﻿# -*- coding: utf-8 -*-
 """
 Separate Thai text into Thai Character Cluster (TCC).
-Based on "Character cluster based Thai information retrieval" (Theeramunkong et al. 2002)
-http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.59.2548
+Based on "Character cluster based Thai information retrieval" (Theeramunkong et al. 2000)
+https://dl.acm.org/citation.cfm?id=355225
+http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.59.2548 
 
 Credits:
 - TCC: Jakkrit TeCho
@@ -10,6 +11,7 @@
 - Python code: Korakot Chaovavanich
 """
 import re
+from typing import List, Set
 
 RE_TCC = (
     """\
@@ -47,9 +49,9 @@
 PAT_TCC = re.compile("|".join(RE_TCC))
 
 
-def tcc_gen(w):
+def tcc_gen(w: str) -> str:
     if not w:
-        return ''
+        return ""
 
     p = 0
     while p < len(w):
@@ -62,7 +64,7 @@ def tcc_gen(w):
         p += n
 
 
-def tcc_pos(text):
+def tcc_pos(text: str) -> Set[int]:
     if not text:
         return set()
 
@@ -75,8 +77,5 @@ def tcc_pos(text):
     return p_set
 
 
-def tcc(text, sep="/"):
-    if not text:
-        return ""
-
-    return sep.join(tcc_gen(text))
+def tcc(text: str) -> List[str]:
+    return list(tcc_gen(text))
diff --git a/pythainlp/tools/__init__.py b/pythainlp/tools/__init__.py
index e2487e582..5f7a5a5cb 100644
--- a/pythainlp/tools/__init__.py
+++ b/pythainlp/tools/__init__.py
@@ -5,19 +5,20 @@
 For text processing and text conversion, see pythainlp.util
 """
 import os
-import sys
+
 import pythainlp
 
 PYTHAINLP_DATA_DIR = "pythainlp-data"
 
-def get_full_data_path(path):
+
+def get_full_data_path(path: str) -> str:
     """
     Get filename/path of a dataset, return full path of that filename/path
     """
     return os.path.join(get_pythainlp_data_path(), path)
 
 
-def get_pythainlp_data_path():
+def get_pythainlp_data_path() -> str:
     """
     Return full path where PyThaiNLP keeps its (downloaded) data
     """
@@ -27,7 +28,7 @@ def get_pythainlp_data_path():
     return path
 
 
-def get_pythainlp_path():
+def get_pythainlp_path() -> str:
     """
     Return full path of PyThaiNLP code
     """
diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py
index df96b0360..91435cc54 100644
--- a/pythainlp/transliterate/__init__.py
+++ b/pythainlp/transliterate/__init__.py
@@ -3,15 +3,15 @@
 from pythainlp.tokenize import word_tokenize
 
 
-# ถอดเสียงภาษาไทยเป็นอักษรละติน
-def romanize(text, engine="royin"):
+def romanize(text: str, engine: str = "royin") -> str:
     """
+    ถอดเสียงภาษาไทยเป็นอักษรละติน
     :param str text: Thai text to be romanized
     :param str engine: 'royin' (default) or 'thai2rom'. 'royin' uses Thai Royal Institute standard. 'thai2rom' is deep learning Thai romanization (require keras).
     :return: English (more or less) text that spells out how the Thai text should read.
     """
 
-    if isinstance(text,str)==False:
+    if not isinstance(text, str) or not text:
         return ""
 
     if engine == "thai2rom":
@@ -21,22 +21,20 @@ def romanize(text, engine="royin"):
     else:  # use default engine "royin"
         from .royin import romanize
 
-        try:
-            words = word_tokenize(text)
-            romanized_words = [romanize(word) for word in words]
-        except:
-            romanized_words =[romanize(text)]
+        words = word_tokenize(text)
+        romanized_words = [romanize(word) for word in words]
+
         return "".join(romanized_words)
 
 
-def transliterate(text, engine="ipa"):
+def transliterate(text: str, engine: str = "ipa") -> str:
     """
     :param str text: Thai text to be transliterated
     :param str engine: 'ipa' (default) or 'pyicu'.
     :return: A string of Internaitonal Phonetic Alphabets indicating how the text should read.
     """
 
-    if not text:
+    if not isinstance(text, str) or not text:
         return ""
 
     if engine == "pyicu":
diff --git a/pythainlp/transliterate/ipa.py b/pythainlp/transliterate/ipa.py
index 5fe18d24d..be7c1e1c6 100644
--- a/pythainlp/transliterate/ipa.py
+++ b/pythainlp/transliterate/ipa.py
@@ -7,7 +7,7 @@
 _EPI_THA = epitran.Epitran("tha-Thai")
 
 
-def transliterate(text):
+def transliterate(text: str) -> str:
     return _EPI_THA.transliterate(text)
 
 
diff --git a/pythainlp/transliterate/pyicu.py b/pythainlp/transliterate/pyicu.py
index e34be0e16..5e4a755aa 100644
--- a/pythainlp/transliterate/pyicu.py
+++ b/pythainlp/transliterate/pyicu.py
@@ -6,7 +6,7 @@
 
 
 # ถอดเสียงภาษาไทยเป็นอักษรละติน
-def transliterate(text):
+def transliterate(text: str) -> str:
     """
     ถอดเสียงภาษาไทยเป็นอักษรละติน รับค่า ''str'' ข้อความ คืนค่า ''str'' อักษรละติน
     """
diff --git a/pythainlp/transliterate/royin.py b/pythainlp/transliterate/royin.py
index 62e44783b..d6f6f71c8 100644
--- a/pythainlp/transliterate/royin.py
+++ b/pythainlp/transliterate/royin.py
@@ -117,20 +117,20 @@
 )
 
 
-def _normalize(text):
+def _normalize(text: str) -> str:
     """ตัดอักษรที่ไม่ออกเสียง (การันต์ ไปยาลน้อย ไม้ยมก*) และวรรณยุกต์ทิ้ง"""
     return _RE_NORMALIZE.sub("", text)
 
 
-def _replace_vowels(word):
+def _replace_vowels(word: str) -> str:
     for vowel in _VOWELS:
         word = re.sub(vowel[0], vowel[1], word)
 
     return word
 
 
-def _replace_consonants(word, res):
-    if res is None:
+def _replace_consonants(word: str, res: str) -> str:
+    if not res:
         pass
     elif len(res) == 1:
         word = word.replace(res[0], _CONSONANTS[res[0]][0])
@@ -162,9 +162,10 @@ def _replace_consonants(word, res):
     return word
 
 
-def romanize(word):
-    if isinstance(word,str)==False:
+def romanize(word: str) -> str:
+    if not isinstance(word, str) or not word:
         return ""
+
     word2 = _replace_vowels(_normalize(word))
     res = _RE_CONSONANT.findall(word2)
 
@@ -175,5 +176,5 @@ def romanize(word):
         word2 = "".join(word2)
 
     word2 = _replace_consonants(word2, res)
-
+    
     return word2
\ No newline at end of file
diff --git a/pythainlp/transliterate/thai2rom.py b/pythainlp/transliterate/thai2rom.py
index 49a498d83..1dc5a5267 100644
--- a/pythainlp/transliterate/thai2rom.py
+++ b/pythainlp/transliterate/thai2rom.py
@@ -157,5 +157,5 @@ def romanize(self, text):
 _THAI_TO_ROM = ThaiTransliterator()
 
 
-def romanize(text):
+def romanize(text: str) -> str:
     return _THAI_TO_ROM.romanize(text)
diff --git a/pythainlp/ulmfit/__init__.py b/pythainlp/ulmfit/__init__.py
index ab56c81ce..00c9f8891 100644
--- a/pythainlp/ulmfit/__init__.py
+++ b/pythainlp/ulmfit/__init__.py
@@ -35,8 +35,9 @@
 _MODEL_NAME_LSTM = "wiki_lm_lstm"
 _ITOS_NAME_LSTM = "wiki_itos_lstm"
 
+
 # Download pretrained models
-def _get_path(fname):
+def _get_path(fname: str) -> str:
     """
     :meth: download get path of file from pythainlp-corpus
     :param str fname: file name
@@ -56,7 +57,7 @@ class ThaiTokenizer(BaseTokenizer):
     https://docs.fast.ai/text.transform#BaseTokenizer
     """
 
-    def __init__(self, lang = "th"):
+    def __init__(self, lang="th"):
         self.lang = lang
 
     def tokenizer(self, t):
@@ -94,6 +95,7 @@ def rm_brackets(t):
     new_line = re.sub(r"\[\]", "", new_line)
     return new_line
 
+
 def ungroup_emoji(toks):
     "Ungroup emojis"
     res = []
@@ -105,6 +107,7 @@ def ungroup_emoji(toks):
             res.append(tok)
     return res
 
+
 def lowercase_all(toks):
     "lowercase all English words"
     return [tok.lower() for tok in toks]
@@ -112,17 +115,26 @@ def lowercase_all(toks):
 
 # Pretrained paths
 # TODO: Let the user decide if they like to download (at setup?)
-_THWIKI_LSTM = dict(wgts_fname=_get_path(_MODEL_NAME_LSTM), itos_fname=_get_path(_ITOS_NAME_LSTM))
+_THWIKI_LSTM = dict(
+    wgts_fname=_get_path(_MODEL_NAME_LSTM), itos_fname=_get_path(_ITOS_NAME_LSTM)
+)
 
 # Preprocessing rules for Thai text
-pre_rules_th = [fix_html, replace_rep_after, normalize_char_order, 
-                spec_add_spaces, rm_useless_spaces, rm_useless_newlines, rm_brackets]
+pre_rules_th = [
+    fix_html,
+    replace_rep_after,
+    normalize_char_order,
+    spec_add_spaces,
+    rm_useless_spaces,
+    rm_useless_newlines,
+    rm_brackets,
+]
 post_rules_th = [replace_all_caps, ungroup_emoji, lowercase_all]
 
 _tokenizer = ThaiTokenizer()
 
 
-def document_vector(text, learn, data, agg='mean'):
+def document_vector(text, learn, data, agg="mean"):
     """
     :meth: `document_vector` get document vector using fastai language model and data bunch
     :param str text: text to extract embeddings
@@ -131,18 +143,18 @@ def document_vector(text, learn, data, agg='mean'):
     :param agg: how to aggregate embeddings
     :return: `numpy.array` of document vector sized 400 based on the encoder of the model
     """
-    
+
     s = _tokenizer.tokenizer(text)
     t = torch.tensor(data.vocab.numericalize(s), requires_grad=False).to(device)
     m = learn.model[0].encoder.to(device)
     res = m(t).cpu().detach().numpy()
-    if agg == 'mean':
+    if agg == "mean":
         res = res.mean(0)
-    elif agg == 'sum':
+    elif agg == "sum":
         res = res.sum(0)
     else:
-        raise ValueError('Aggregate by mean or sum')
-    return(res)
+        raise ValueError("Aggregate by mean or sum")
+    return res
 
 
 def merge_wgts(em_sz, wgts, itos_pre, itos_new):
diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index b7e194436..6a4ff0ce6 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -11,9 +11,9 @@
     "digit_to_text",
     "eng_to_thai",
     "find_keyword",
-    "is_thai",
-    "is_thaichar",
-    "is_thaiword",
+    "countthai",
+    "isthai",
+    "isthaichar",
     "normalize",
     "now_reign_year",
     "num_to_thaiword",
@@ -42,6 +42,6 @@
 from .keywords import find_keyword, rank
 from .normalize import deletetone, normalize
 from .numtoword import bahttext, num_to_thaiword
-from .thai import is_thai, is_thaichar, is_thaiword
+from .thai import countthai, isthai, isthaichar
+from .thaiwordcheck import thaicheck
 from .wordtonum import thaiword_to_num
-from .thaiwordcheck import thaicheck
\ No newline at end of file
diff --git a/pythainlp/util/collate.py b/pythainlp/util/collate.py
index bc35c2fe9..ffaff4998 100644
--- a/pythainlp/util/collate.py
+++ b/pythainlp/util/collate.py
@@ -4,25 +4,27 @@
 Simple implementation using regular expressions
 """
 import re
+from typing import Iterable, List
 
 _RE_TONE = re.compile(r"[็-์]")
 _RE_LV_C = re.compile(r"([เ-ไ])([ก-ฮ])")
 
 
-def _thkey(word):
+def _thkey(word: str) -> str:
     cv = _RE_TONE.sub("", word)  # remove tone
     cv = _RE_LV_C.sub("\\2\\1", cv)  # switch lead vowel
     tone = _RE_TONE.sub(" ", word)  # just tone
     return cv + tone
 
 
-def collate(data):
+def collate(data: Iterable, reverse: bool = False) -> List[str]:
     """
-    :param list data: a list of strings
+    :param list data: a list of strings to be sorted
+    :param bool reverse: reverse flag, set to get the result in descending order
     :return: a list of strings, sorted alphabetically, according to Thai rules
     **Example**::
         >>> from pythainlp.util import *
         >>> collate(['ไก่', 'เป็ด', 'หมู', 'วัว'])
         ['ไก่', 'เป็ด', 'วัว', 'หมู']
     """
-    return sorted(data, key=_thkey)
+    return sorted(data, key=_thkey, reverse=reverse)
diff --git a/pythainlp/util/date.py b/pythainlp/util/date.py
index f2d2ee15b..903e42fd4 100644
--- a/pythainlp/util/date.py
+++ b/pythainlp/util/date.py
@@ -63,7 +63,7 @@
 
 
 # Conversion support for thai_strftime()
-def _thai_strftime(datetime, fmt_c):
+def _thai_strftime(datetime, fmt_c: str) -> str:
     text = ""
     if fmt_c == "a":  # abbreviated weekday
         text = thai_abbr_weekdays[datetime.weekday()]
@@ -73,7 +73,7 @@ def _thai_strftime(datetime, fmt_c):
         text = thai_abbr_months[datetime.month - 1]
     elif fmt_c == "B":  # full month
         text = thai_full_months[datetime.month - 1]
-    elif fmt_c == "y":  #  # year without century
+    elif fmt_c == "y":  # year without century
         text = str(datetime.year + 543)[2:4]
     elif fmt_c == "Y":  # year with century
         text = str(datetime.year + 543)
@@ -97,7 +97,7 @@ def _thai_strftime(datetime, fmt_c):
     return text
 
 
-def thai_strftime(datetime, fmt, thaidigit=False):
+def thai_strftime(datetime, fmt: str, thaidigit=False) -> str:
     """
     Thai date and time string formatter
 
@@ -126,7 +126,7 @@ def thai_strftime(datetime, fmt, thaidigit=False):
     If supported, we can just locale.setlocale(locale.LC_TIME, "th_TH") and
     then use native datetime.strftime().
 
-    :return: Date and time spelled out in text, with month in Thai name and year in Thai Buddhist Era (BE).
+    :return: Date and time spelled out, with day and month names in Thai and year in Thai Buddhist Era (BE).
     """
     thaidate_parts = []
 
diff --git a/pythainlp/util/digitconv.py b/pythainlp/util/digitconv.py
index 16e634833..3982168d6 100644
--- a/pythainlp/util/digitconv.py
+++ b/pythainlp/util/digitconv.py
@@ -56,7 +56,7 @@
 }
 
 
-def thai_digit_to_arabic_digit(text):
+def thai_digit_to_arabic_digit(text: str) -> str:
     """
     :param str text: Text with Thai digits such as '๑', '๒', '๓'
     :return: Text with Thai digits being converted to Arabic digits such as '1', '2', '3'
@@ -74,7 +74,7 @@ def thai_digit_to_arabic_digit(text):
     return "".join(newtext)
 
 
-def arabic_digit_to_thai_digit(text):
+def arabic_digit_to_thai_digit(text: str) -> str:
     """
     :param str text: Text with Arabic digits such as '1', '2', '3'
     :return: Text with Arabic digits being converted to Thai digits such as '๑', '๒', '๓'
@@ -92,7 +92,7 @@ def arabic_digit_to_thai_digit(text):
     return "".join(newtext)
 
 
-def digit_to_text(text):
+def digit_to_text(text: str) -> str:
     """
     :param str text: Text with digits such as '1', '2', '๓', '๔'
     :return: Text with digits being spelled out in Thai
@@ -113,7 +113,7 @@ def digit_to_text(text):
     return "".join(newtext)
 
 
-def text_to_arabic_digit(text):
+def text_to_arabic_digit(text: str) -> str:
     """
     :param text: A digit spelled out in Thai
     :return: An Arabic digit such as '1', '2', '3'
@@ -124,7 +124,7 @@ def text_to_arabic_digit(text):
     return _spell_digit[text]
 
 
-def text_to_thai_digit(text):
+def text_to_thai_digit(text: str) -> str:
     """
     :param text: A digit spelled out in Thai
     :return: A Thai digit such as '๑', '๒', '๓'
diff --git a/pythainlp/util/keyboard.py b/pythainlp/util/keyboard.py
index 8fb4abc6e..ad156715d 100644
--- a/pythainlp/util/keyboard.py
+++ b/pythainlp/util/keyboard.py
@@ -101,7 +101,7 @@
 TH_EN_KEYB_PAIRS = {v: k for k, v in EN_TH_KEYB_PAIRS.items()}
 
 
-def eng_to_thai(text):
+def eng_to_thai(text: str) -> str:
     """
     Correct text in one language that is incorrectly-typed with a keyboard layout in another language. (type Thai with English keyboard)
 
@@ -113,7 +113,7 @@ def eng_to_thai(text):
     )
 
 
-def thai_to_eng(text):
+def thai_to_eng(text: str) -> str:
     """
     Correct text in one language that is incorrectly-typed with a keyboard layout in another language. (type Thai with English keyboard)
 
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 66c179fb9..3e05a2c69 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -44,7 +44,7 @@
 ]  # เก็บพวก พิมพ์ลำดับผิดหรือผิดแป้นแต่กลับแสดงผลถูกต้อง ให้ไปเป็นแป้นที่ถูกต้อง เช่น เ + เ ไปเป็น แ
 
 
-def normalize(text):
+def normalize(text: str) -> str:
     """
     Thai text normalize
 
@@ -61,7 +61,7 @@ def normalize(text):
     return text
 
 
-def deletetone(text):
+def deletetone(text: str) -> str:
     """
     Remove tonemarks
 
diff --git a/pythainlp/util/numtoword.py b/pythainlp/util/numtoword.py
index 394984d70..68519cb79 100644
--- a/pythainlp/util/numtoword.py
+++ b/pythainlp/util/numtoword.py
@@ -10,7 +10,7 @@
 __all__ = ["bahttext", "num_to_thaiword"]
 
 
-def bahttext(number):
+def bahttext(number: float) -> str:
     """
     Converts a number to Thai text and adds a suffix of "Baht" currency.
     Precision will be fixed at two decimal places (0.00) to fits "Satang" unit.
@@ -41,9 +41,9 @@ def bahttext(number):
     return ret
 
 
-def num_to_thaiword(number):
+def num_to_thaiword(number: int) -> str:
     """
-    :param float number: a float number (with decimals) indicating a quantity
+    :param int number: a float number (with decimals) indicating a quantity
     :return: a text that indicates the full amount in word form, properly ending each digit with the right term.
     """
     ret = ""
diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py
index f6b8f3d58..70e5a9d15 100644
--- a/pythainlp/util/thai.py
+++ b/pythainlp/util/thai.py
@@ -2,11 +2,15 @@
 """
 Check if it is Thai text
 """
+import string
 
+_DEFAULT_IGNORE_CHARS = string.whitespace + string.digits + string.punctuation
 
-def is_thaichar(ch):  # เป็นอักษรไทยหรือไม่
+
+def isthaichar(ch: str) -> bool:
     """
-    Check if character is Thai
+    Check if a character is Thai
+    เป็นอักษรไทยหรือไม่
 
     :param str ch: input character
     :return: True or False
@@ -17,45 +21,44 @@ def is_thaichar(ch):  # เป็นอักษรไทยหรือไม
     return False
 
 
-def is_thaiword(word):  # เป็นคำที่มีแต่อักษรไทยหรือไม่
+def isthai(word: str, ignore_chars: str = ".") -> bool:
     """
     Check if all character is Thai
+    เป็นคำที่มีแต่อักษรไทยหรือไม่
 
     :param str word: input text
+    :param str ignore_chars: characters to be ignored (i.e. will be considered as Thai)
     :return: True or False
     """
+    if not ignore_chars:
+        ignore_chars = ""
+
     for ch in word:
-        if ch != "." and not is_thaichar(ch):
+        if ch not in ignore_chars and not isthaichar(ch):
             return False
     return True
 
 
-def is_thai(text, check_all=False):
+def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float:
     """
-    :param str text: input string or list of strings
-    :param bool check_all: checks all character or not
-
-    :return: A dictionary with the first value as proportional of text that is Thai, and the second value being a tuple of all characters, along with true or false.
+    :param str text: input text
+    :return: float, proportion of characters in the text that is Thai character
     """
-    isthais = []
-    num_isthai = 0
+    if not text:
+        return 0
+
+    if not ignore_chars:
+        ignore_chars = ""
+
+    num_thai = 0
+    num_ignore = 0
 
     for ch in text:
-        ch_val = ord(ch)
-        if ch_val >= 3584 and ch_val <= 3711:
-            num_isthai += 1
-            if check_all:
-                isthais.append(True)
-        else:
-            if check_all:
-                isthais.append(False)
-    thai_percent = (num_isthai / len(text)) * 100
-
-    if check_all:
-        chars = list(text)
-        isthai_pairs = tuple(zip(chars, isthais))
-        data = {"thai": thai_percent, "check_all": isthai_pairs}
-    else:
-        data = {"thai": thai_percent}
-
-    return data
+        if ch in ignore_chars:
+            num_ignore += 1
+        elif isthaichar(ch):
+            num_thai += 1
+
+    num_count = len(text) - num_ignore
+
+    return (num_thai / num_count) * 100
diff --git a/pythainlp/util/thaiwordcheck.py b/pythainlp/util/thaiwordcheck.py
index d2a036370..7237d2db6 100644
--- a/pythainlp/util/thaiwordcheck.py
+++ b/pythainlp/util/thaiwordcheck.py
@@ -1,51 +1,76 @@
 # -*- coding: utf-8 -*-
-'''
-From https://github.com/wannaphongcom/open-thai-nlp-document/blob/master/check_thai_word.md
-'''
+"""
+From
+https://github.com/wannaphongcom/open-thai-nlp-document/blob/master/check_thai_word.md
+"""
 import re
 
-def _check1(word): # เช็คตัวสะกดว่าตรงตามมาตราไหม
-	if word in ['ก','ด','บ','น','ง','ม','ย','ว']:
-		return True
-	else:
-		return False
-def _check2(word): # เช็คตัวการันต์ ถ้ามี ไม่ใช่คำไทยแท้
-	if '์' in word:
-		return False
-	else:
-		return True
-def _check3(word):
-	if word in list("ฆณฌฎฏฐฑฒธศษฬ"): # ถ้ามี แสดงว่าไม่ใช่คำไทยแท้
-		return False
-	else:
-		return True
-def thaicheck(word):
-	"""
-	Check is Thai Word
-
-	:param str word: word
-	:return: True or False
-	"""
-	pattern = re.compile(r"[ก-ฬฮ]",re.U) # สำหรับตรวจสอบพยัญชนะ
-	res = re.findall(pattern,word) # ดึงพยัญชนะทัั้งหมดออกมา
-	if res==[]:
-		return False
-	elif _check1(res[len(res)-1]) or len(res)==1:
-		if _check2(word):
-			word2=list(word)
-			i=0
-			thai=True
-			if word in ['ฆ่า','เฆี่ยน','ศึก','ศอก','เศิก','เศร้า','ธ','ณ','ฯพณฯ','ใหญ่','หญ้า','ควาย','ความ','กริ่งเกรง','ผลิ']: # ข้อยกเว้น คำเหล่านี้เป็นคำไทยแท้
-				return True
-			while i<len(word2) and thai==True:
-				thai= _check3(word2[i])
-				if thai==False:
-					return False
-				i+=1
-			return True
-		else:
-			return False
-	elif word in ['กะ','กระ','ปะ','ประ']:
-		return True
-	else:
-		return False
+
+def _check1(word: str) -> bool:  # เช็คตัวสะกดว่าตรงตามมาตราไหม
+    if word in ["ก", "ด", "บ", "น", "ง", "ม", "ย", "ว"]:
+        return True
+    return False
+
+
+def _check2(word: str) -> bool:  # เช็คตัวการันต์ ถ้ามี ไม่ใช่คำไทยแท้
+    if "์" in word:
+        return False
+    return True
+
+
+def _check3(word: str) -> bool:
+    if word in list("ฆณฌฎฏฐฑฒธศษฬ"):  # ถ้ามี แสดงว่าไม่ใช่คำไทยแท้
+        return False
+    return True
+
+
+def thaicheck(word: str) -> bool:
+    """
+    Check if a word is an "authentic Thai word"
+
+    :param str word: word
+    :return: True or False
+    """
+    pattern = re.compile(r"[ก-ฬฮ]", re.U)  # สำหรับตรวจสอบพยัญชนะ
+    res = re.findall(pattern, word)  # ดึงพยัญชนะทัั้งหมดออกมา
+
+    if res == []:
+        return False
+
+    if _check1(res[len(res) - 1]) or len(res) == 1:
+        if _check2(word):
+            word2 = list(word)
+            i = 0
+            thai = True
+            if word in [
+                "ฆ่า",
+                "เฆี่ยน",
+                "ศึก",
+                "ศอก",
+                "เศิก",
+                "เศร้า",
+                "ธ",
+                "ณ",
+                "ฯพณฯ",
+                "ใหญ่",
+                "หญ้า",
+                "ควาย",
+                "ความ",
+                "กริ่งเกรง",
+                "ผลิ",
+            ]:  # ข้อยกเว้น คำเหล่านี้เป็นคำไทยแท้
+                return True
+
+            while i < len(word2) and thai:
+                thai = _check3(word2[i])
+                if not thai:
+                    return False
+                i += 1
+            return True
+
+        return False
+
+    if word in ["กะ", "กระ", "ปะ", "ประ"]:
+        return True
+
+    return False
diff --git a/pythainlp/util/wordtonum.py b/pythainlp/util/wordtonum.py
index 7521ec156..43305d329 100644
--- a/pythainlp/util/wordtonum.py
+++ b/pythainlp/util/wordtonum.py
@@ -6,6 +6,7 @@
 https://colab.research.google.com/drive/148WNIeclf0kOU6QxKd6pcfwpSs8l-VKD#scrollTo=EuVDd0nNuI8Q
 """
 import re
+from typing import Iterable, List
 
 from pythainlp.tokenize import Tokenizer
 
@@ -39,7 +40,7 @@
 _TOKENIZER = Tokenizer(custom_dict=_THAIWORD_NUMS_UNITS)
 
 
-def _thaiword_to_num(tokens):
+def _thaiword_to_num(tokens: List[str]) -> int:
     if not tokens:
         return None
 
@@ -65,21 +66,21 @@ def _thaiword_to_num(tokens):
         return _THAI_INT_MAP[a] * _THAI_INT_MAP[b] + _thaiword_to_num(tokens[2:])
 
 
-def thaiword_to_num(thaiword):
+def thaiword_to_num(word: str) -> int:
     """
-    Converts a thai word to number
+    Converts a Thai number spellout word to actual number value
 
-    :param str thaiword: input thai word
+    :param str word: a Thai number spellout
     :return: number
     """
-    if not thaiword:
+    if not word:
         return None
 
     tokens = []
-    if isinstance(thaiword,str):
-        tokens = _TOKENIZER.word_tokenize(thaiword)
-    elif isinstance(thaiword,list) or isinstance(thaiword,tuple) or isinstance(thaiword,set) or isinstance(thaiword,frozenset):
-        for w in thaiword:
+    if isinstance(word, str):
+        tokens = _TOKENIZER.word_tokenize(word)
+    elif isinstance(word, Iterable):
+        for w in word:
             tokens.extend(_TOKENIZER.word_tokenize(w))
 
     res = []
diff --git a/pythainlp/word_vector/__init__.py b/pythainlp/word_vector/__init__.py
index d035e5395..d1da4a2e3 100644
--- a/pythainlp/word_vector/__init__.py
+++ b/pythainlp/word_vector/__init__.py
@@ -4,6 +4,8 @@
 thai2fit - Thai word vector
 Code by https://github.com/cstorm125/thai2fit
 """
+from typing import List
+
 import numpy as np
 from gensim.models import KeyedVectors
 from pythainlp.corpus import download as download_data
@@ -13,7 +15,7 @@
 WV_DIM = 300
 
 
-def _download():
+def _download() -> str:
     path = get_corpus_path("thai2fit_wv")
     if not path:
         download_data("thai2fit_wv")
@@ -33,7 +35,7 @@ def get_model():
 _MODEL = get_model()
 
 
-def most_similar_cosmul(positive: list, negative: list):
+def most_similar_cosmul(positive: List[str], negative: List[str]):
     """
     Word arithmetic operations
     If a word is not in the vocabulary, KeyError will be raised.
@@ -47,18 +49,18 @@ def most_similar_cosmul(positive: list, negative: list):
     return _MODEL.most_similar_cosmul(positive=positive, negative=negative)
 
 
-def doesnt_match(listdata):
+def doesnt_match(words: List[str]) -> str:
     """
     Pick one word that doesn't match other words in the list
     If a word is not in the vocabulary, KeyError will be raised.
 
-    :param list listdata: a list of words
+    :param list words: a list of words
     :return: word that doesn't match
     """
-    return _MODEL.doesnt_match(listdata)
+    return _MODEL.doesnt_match(words)
 
 
-def similarity(word1, word2):
+def similarity(word1: str, word2: str) -> float:
     """
     Get cosine similarity between two words.
     If a word is not in the vocabulary, KeyError will be raised.
@@ -70,7 +72,7 @@ def similarity(word1, word2):
     return _MODEL.similarity(word1, word2)
 
 
-def sentence_vectorizer(text, use_mean=True):
+def sentence_vectorizer(text: str, use_mean: bool = True):
     """
     Get sentence vector from text
     If a word is not in the vocabulary, KeyError will be raised.
diff --git a/requirements.txt b/requirements.txt
index 3159b92b1..7fd66ad78 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,5 @@
-marisa-trie
-nltk>=3.2.2
 dill
-marisa_trie
+marisa-trie
 nltk>=3.2.2
 pytz
 requests
diff --git a/setup.cfg b/setup.cfg
index 350779304..809721c80 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0
+current_version = 2.0.2
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index a47948438..879e9b93f 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
     "ipa": ["epitran"],
     "ml": ["fastai>=1.0.38", "keras", "numpy", "torch"],
     "ner": ["sklearn-crfsuite"],
-    "thai2fit": ["gensim", "numpy","emoji"],
+    "thai2fit": ["emoji", "gensim", "numpy"],
     "thai2rom": ["keras", "numpy"],
     "full": [
         "artagger",
@@ -34,7 +34,7 @@
 
 setup(
     name="pythainlp",
-    version="2.0",
+    version="2.0.2",
     description="Thai Natural Language Processing library",
     long_description=readme,
     long_description_content_type="text/markdown",
@@ -54,16 +54,12 @@
             "stopwords_th.txt",
             "syllables_th.txt",
             "tha-wn.db",
-            "new-thaidict.txt",
-            "negation.txt",
-            "provinces.csv",
-            "pt_tagger_1.dill",
-            "ud_thai-pud_pt_tagger.dill",
-            "ud_thai-pud_unigram_tagger.dill",
-            "unigram_tagger.dill",
-            "words_th.txt",
+            "thailand_provinces_th.txt",
+            "tnc_freq.txt",
+            "ud_thai_pud_pt_tagger.dill",
+            "ud_thai_pud_unigram_tagger.dill",
             "words_th_frozen_201810.txt",
-            "tnc_freq.txt"
+            "words_th.txt",
         ],
     },
     include_package_data=True,
@@ -77,6 +73,8 @@
        "natural language processing",
        "text analytics",
        "ThaiNLP",
+       "text processing",
+       "localization",
     ],
     classifiers=[
         "Development Status :: 5 - Production/Stable",
diff --git a/tests/__init__.py b/tests/__init__.py
index 6ba23adda..e569951cd 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -20,11 +20,10 @@
     tnc,
     ttc,
     wordnet,
-    download
+    download,
 )
 from pythainlp.soundex import lk82, metasound, soundex, udom83
-from pythainlp.spell import correct, spell
-from pythainlp.spell.pn import NorvigSpellChecker, dictionary, known, prob
+from pythainlp.spell import correct, spell, NorvigSpellChecker
 from pythainlp.summarize import summarize
 from pythainlp.tag import perceptron, pos_tag, pos_tag_sents, unigram
 from pythainlp.tag.locations import tag_provinces
@@ -37,7 +36,7 @@
     multi_cut,
     newmm,
     dict_trie,
-    Tokenizer
+    Tokenizer,
 )
 from pythainlp.tokenize import pyicu as tokenize_pyicu
 from pythainlp.tokenize import (
@@ -58,9 +57,9 @@
     digit_to_text,
     eng_to_thai,
     find_keyword,
-    is_thai,
-    is_thaichar,
-    is_thaiword,
+    countthai,
+    isthai,
+    isthaichar,
     normalize,
     now_reign_year,
     num_to_thaiword,
@@ -72,9 +71,9 @@
     thai_strftime,
     thai_to_eng,
     thaiword_to_num,
-    thaicheck
+    thaicheck,
 )
-#from pythainlp.ulmfit import rm_brackets
+
 
 class TestUM(unittest.TestCase):
     """
@@ -177,10 +176,6 @@ def test_spell(self):
         self.assertEqual(correct(""), "")
         self.assertIsNotNone(correct("ทดสอง"))
 
-        self.assertIsNotNone(dictionary())
-        self.assertGreaterEqual(prob("มี"), 0)
-        self.assertIsNotNone(known(["เกิด", "abc", ""]))
-
         checker = NorvigSpellChecker(dict_filter="")
         self.assertIsNotNone(checker.dictionary())
         self.assertGreaterEqual(checker.prob("มี"), 0)
@@ -262,7 +257,13 @@ def test_ner(self):
         self.assertEqual(ner.get_ner(""), [])
         self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"))
         self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
-        self.assertIsNotNone(ner.get_ner("คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ ที่อยู่ มหาวิทยาลัยขอนแก่น วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง จังหวัดหนองคาย 43000"))
+        self.assertIsNotNone(
+            ner.get_ner(
+                """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
+                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
+                จังหวัดหนองคาย 43000"""
+            )
+        )
         # self.assertEqual(
         #     ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"),
         #     [
@@ -339,8 +340,9 @@ def test_word_tokenize(self):
         self.assertIsNotNone(word_tokenize("ทดสอบ", engine="XX"))
         self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
         self.assertIsNotNone(word_tokenize("", engine="deepcut"))
+
     def test_Tokenizer(self):
-        t_test=Tokenizer()
+        t_test = Tokenizer()
         self.assertEqual(t_test.word_tokenize(""), [])
 
     def test_word_tokenize_icu(self):
@@ -399,7 +401,8 @@ def test_sent_tokenize(self):
         self.assertEqual(sent_tokenize(None), [])
         self.assertEqual(sent_tokenize(""), [])
         self.assertEqual(
-            sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace"), ["รักน้ำ", "รักปลา", ""]
+            sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace"),
+            ["รักน้ำ", "รักปลา", ""],
         )
         self.assertEqual(sent_tokenize("รักน้ำ  รักปลา  "), ["รักน้ำ", "รักปลา"])
 
@@ -416,9 +419,9 @@ def test_syllable_tokenize(self):
         )
 
     def test_tcc(self):
-        self.assertEqual(tcc.tcc(None), "")
-        self.assertEqual(tcc.tcc(""), "")
-        self.assertEqual(tcc.tcc("ประเทศไทย"), "ป/ระ/เท/ศ/ไท/ย")
+        self.assertEqual(tcc.tcc(None), [])
+        self.assertEqual(tcc.tcc(""), [])
+        self.assertEqual(tcc.tcc("ประเทศไทย"), ["ป", "ระ", "เท", "ศ", "ไท", "ย"])
 
         self.assertEqual(list(tcc.tcc_gen("")), [])
         self.assertEqual(tcc.tcc_pos(""), set())
@@ -558,20 +561,24 @@ def test_normalize(self):
 
     # ### pythainlp.util.thai
 
-    def test_is_thai(self):
-        self.assertEqual(is_thai("ประเทศไทย"), {"thai": 100.0})
-        self.assertIsNotNone(is_thai("เผือก", check_all=True))
-        self.assertIsNotNone(is_thai("เผือกabc", check_all=True))
-
-    def test_is_thaichar(self):
-        self.assertEqual(is_thaichar("ก"), True)
-        self.assertEqual(is_thaichar("a"), False)
-        self.assertEqual(is_thaichar("0"), False)
-
-    def test_is_thaiword(self):
-        self.assertEqual(is_thaiword("ไทย"), True)
-        self.assertEqual(is_thaiword("ต.ค."), True)
-        self.assertEqual(is_thaiword("ไทย0"), False)
+    def test_countthai(self):
+        self.assertEqual(countthai(""), 0)
+        self.assertEqual(countthai("ประเทศไทย"), 100.0)
+        self.assertEqual(countthai("(กกต.)", ".()"), 100.0)
+        self.assertEqual(countthai("(กกต.)", None), 50.0)
+
+    def test_isthaichar(self):
+        self.assertEqual(isthaichar("ก"), True)
+        self.assertEqual(isthaichar("a"), False)
+        self.assertEqual(isthaichar("0"), False)
+
+    def test_isthai(self):
+        self.assertEqual(isthai("ไทย"), True)
+        self.assertEqual(isthai("ไทย0"), False)
+        self.assertEqual(isthai("ต.ค."), True)
+        self.assertEqual(isthai("(ต.ค.)"), False)
+        self.assertEqual(isthai("ต.ค.", ignore_chars=None), False)
+        self.assertEqual(isthai("(ต.ค.)", ignore_chars=".()"), True)
 
     def test_is_thaicheck(self):
         self.assertEqual(thaicheck("ตา"), True)
@@ -608,5 +615,6 @@ def test_thai2vec(self):
             word_vector.doesnt_match(["ญี่ปุ่น", "พม่า", "ไอติม"]), "ไอติม"
         )
 
+
 if __name__ == "__main__":
     unittest.main()