diff --git a/.travis.yml b/.travis.yml
index 6588db3e7..75179d4e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,9 +18,11 @@ install:
 
 os:
   - linux
+
 # command to run tests, e.g. python setup.py test
 script:
   coverage run --source=pythainlp setup.py test
+
 after_success:
   coveralls
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 62ecbcbb3..a10d62615 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,15 +19,14 @@ We use the famous [gitflow](http://nvie.com/posts/a-successful-git-branching-mod
 
 ## Code Guidelines
 
-- Use [PEP8](http://www.python.org/dev/peps/pep-0008/);
+- Follows [PEP8](http://www.python.org/dev/peps/pep-0008/), use [black](https://github.com/ambv/black);
 - Write tests for your new features (please see "Tests" topic below);
 - Always remember that [commented code is dead
   code](http://www.codinghorror.com/blog/2008/07/coding-without-comments.html);
 - Name identifiers (variables, classes, functions, module names) with meaningful
   and pronounceable names (`x` is always wrong);
-- When manipulating strings, use [Python's new-style
-  formatting](http://docs.python.org/library/string.html#format-string-syntax)
-  (`'{} = {}'.format(a, b)` instead of `'%s = %s' % (a, b)`);
+- When manipulating strings, use [f-String](https://www.python.org/dev/peps/pep-0498/)
+  (use `"{a} = {b}"`, instead of `"{} = {}".format(a, b)` and `"%s = %s' % (a, b)"`);
 - All `#TODO` comments should be turned into issues (use our
   [GitHub issue system](https://github.com/PyThaiNLP/pythainlp/));
 - Run all tests before pushing (just execute `tox`) so you will know if your
diff --git a/Makefile b/Makefile
index d5c977215..0f103632c 100644
--- a/Makefile
+++ b/Makefile
@@ -28,7 +28,6 @@ help:
 
 clean: clean-build clean-pyc clean-test ## remove all build, test, coverage and Python artifacts
 
-
 clean-build: ## remove build artifacts
 	rm -fr build/
 	rm -fr dist/
@@ -51,19 +50,16 @@ lint: ## check style with flake8
 	flake8 pythainlp tests
 
 test: ## run tests quickly with the default Python
-	
-		python setup.py test
+	python setup.py test
 
 test-all: ## run tests on every Python version with tox
 	tox
 
 coverage: ## check code coverage quickly with the default Python
-	
-		coverage run --source pythainlp setup.py test
-	
-		coverage report -m
-		coverage html
-		$(BROWSER) htmlcov/index.html
+	coverage run --source pythainlp setup.py test
+	coverage report -m
+	coverage html
+	$(BROWSER) htmlcov/index.html
 
 release: clean ## package and upload a release
 	python setup.py sdist upload
diff --git a/README.md b/README.md
index 880a579ca..fd2c26acb 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ PyThaiNLP is a Python package for text processing and linguistic analysis, simil
 
 **This is a document for development branch (post 1.7.x). Things will break. For a stable branch document, see [master](https://github.com/PyThaiNLP/pythainlp/tree/master).**
 
-📫 follow us on Facebook [Pythainlp](https://www.facebook.com/pythainlp/)
+📫 follow us on Facebook [PyThaiNLP](https://www.facebook.com/pythainlp/)
 
 ## Capabilities
 
@@ -34,7 +34,7 @@ PyThaiNLP is a Python package for text processing and linguistic analysis, simil
 - Thai misspellings detection and spelling correction (```spell```)
 - Thai soundex (```lk82```, ```udom83```, ```metasound```)
 - Thai WordNet wrapper
-- and much more - see [examples](https://github.com/PyThaiNLP/pythainlp/tree/dev/examples).
+- and much more - see examples in [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb).
 
 ## Installation
 
@@ -125,7 +125,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 - ตรวจคำสะกดผิดในภาษาไทย (```spell```)
 - soundex ภาษาไทย (```lk82```, ```udom83```, ```metasound```)
 - Thai WordNet wrapper
-- และอื่น ๆ [ดูตัวอย่าง](https://github.com/PyThaiNLP/pythainlp/tree/dev/examples)
+- และอื่น ๆ ดูตัวอย่างได้ใน [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb)
 
 ## ติดตั้ง
 
diff --git a/conda.recipe/meta-old.yaml b/conda.recipe/meta-old.yaml
deleted file mode 100644
index 632fb2109..000000000
--- a/conda.recipe/meta-old.yaml
+++ /dev/null
@@ -1,49 +0,0 @@
-{% set version = "1.7.2" %}
-
-package:
-  name: pythainlp
-  version: {{ version }}
-
-build:
-  noarch: python
-  number: 0
-  script: python -m pip install --no-deps --ignore-installed .
-
-requirements:
-  host:
-    - pip
-    - python
-    - setuptools
-    - nltk
-    - future
-    - six
-    - marisa_trie
-    - dill
-    - pytz
-    - tinydb
-    - tqdm
-
-
-  run:
-    - python
-    - nltk
-    - future
-    - six
-    - marisa_trie
-    - dill
-    - pytz
-    - tinydb
-    - tqdm
-
-test:
-  imports:
-    - pvlib
-
-about:
-  home: https://github.com/PyThaiNLP/pythainlp
-  license: Apache License 2.0
-  summary: 'Thai Natural Language Processing in Python.'
-
-extra:
-  recipe-maintainers:
-    - pythainlp
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index 8e36acad6..ff5babfff 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "1.7.2" %}
+{% set version = "2.0.1" %}
 
 package:
   name: pythainlp
diff --git a/docs/api/spell.rst b/docs/api/spell.rst
index 7544a58d5..b2c77736b 100644
--- a/docs/api/spell.rst
+++ b/docs/api/spell.rst
@@ -8,8 +8,4 @@ Modules
 -------
 
 .. autofunction:: spell
-.. autofunction:: pythainlp.spell.pn.spell
-.. autofunction:: pythainlp.spell.pn.prob
-.. autofunction:: pythainlp.spell.pn.correct
-.. autofunction:: pythainlp.spell.pn.known
-.. autofunction:: pythainlp.spell.pn.dictionary
+.. autofunction:: correct
diff --git a/docs/api/util.rst b/docs/api/util.rst
index 1906fe48d..166f52375 100644
--- a/docs/api/util.rst
+++ b/docs/api/util.rst
@@ -14,9 +14,9 @@ Modules
 .. autofunction:: digit_to_text
 .. autofunction:: eng_to_thai
 .. autofunction:: find_keyword
-.. autofunction:: is_thai
-.. autofunction:: is_thaichar
-.. autofunction:: is_thaiword
+.. autofunction:: countthai
+.. autofunction:: isthai
+.. autofunction:: isthaichar
 .. autofunction:: normalize
 .. autofunction:: now_reign_year
 .. autofunction:: num_to_thaiword
diff --git a/docs/pythainlp-1-3-thai.md b/docs/archive/pythainlp-1-3-thai.md
similarity index 100%
rename from docs/pythainlp-1-3-thai.md
rename to docs/archive/pythainlp-1-3-thai.md
diff --git a/docs/pythainlp-1-4-eng.md b/docs/archive/pythainlp-1-4-eng.md
similarity index 100%
rename from docs/pythainlp-1-4-eng.md
rename to docs/archive/pythainlp-1-4-eng.md
diff --git a/docs/pythainlp-1-4-eng.pdf b/docs/archive/pythainlp-1-4-eng.pdf
similarity index 100%
rename from docs/pythainlp-1-4-eng.pdf
rename to docs/archive/pythainlp-1-4-eng.pdf
diff --git a/docs/pythainlp-1-4-thai.md b/docs/archive/pythainlp-1-4-thai.md
similarity index 100%
rename from docs/pythainlp-1-4-thai.md
rename to docs/archive/pythainlp-1-4-thai.md
diff --git a/docs/pythainlp-1-4-thai.pdf b/docs/archive/pythainlp-1-4-thai.pdf
similarity index 100%
rename from docs/pythainlp-1-4-thai.pdf
rename to docs/archive/pythainlp-1-4-thai.pdf
diff --git a/docs/pythainlp-1-5-eng.md b/docs/archive/pythainlp-1-5-eng.md
similarity index 100%
rename from docs/pythainlp-1-5-eng.md
rename to docs/archive/pythainlp-1-5-eng.md
diff --git a/docs/pythainlp-1-5-thai.md b/docs/archive/pythainlp-1-5-thai.md
similarity index 100%
rename from docs/pythainlp-1-5-thai.md
rename to docs/archive/pythainlp-1-5-thai.md
diff --git a/docs/pythainlp-1-6-eng.md b/docs/archive/pythainlp-1-6-eng.md
similarity index 100%
rename from docs/pythainlp-1-6-eng.md
rename to docs/archive/pythainlp-1-6-eng.md
diff --git a/docs/pythainlp-1-6-thai.md b/docs/archive/pythainlp-1-6-thai.md
similarity index 100%
rename from docs/pythainlp-1-6-thai.md
rename to docs/archive/pythainlp-1-6-thai.md
diff --git a/docs/pythainlp-1-7.md b/docs/archive/pythainlp-1-7.md
similarity index 100%
rename from docs/pythainlp-1-7.md
rename to docs/archive/pythainlp-1-7.md
diff --git a/docs/pythainlp-dev-thai.md b/docs/archive/pythainlp-dev-thai.md
similarity index 100%
rename from docs/pythainlp-dev-thai.md
rename to docs/archive/pythainlp-dev-thai.md
diff --git a/docs/whatsnew-1.7.md b/docs/whatsnew-1.7.md
deleted file mode 100644
index 768b6f450..000000000
--- a/docs/whatsnew-1.7.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# มีอะไรใหม่ใน PyThaiNLP 1.7
-
-## สรุปประเด็นสำคัญ
-
-- เลิกสนับสนุน Python 2.7 อย่างเป็นทางการ
-- เพิ่ม ULMFit utility
-- ปรับปรุงระบบตัดคำใหม่ ทั้ง newmm และ mm
-- thai2vec 0.2
-- sentiment analysis ตัวใหม่ทำงานด้วย deep learning
-- เพิ่ม thai2rom เป็น Thai romanization ทำด้วย deep learning ในระดับตัวอักษร
-
-กำลังปรับปรุง...
diff --git a/examples/collate.py b/examples/collate.py
deleted file mode 100644
index d4e30525e..000000000
--- a/examples/collate.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.util import collate
-
-print(collate(["ไก่", "ไข่", "ก", "ฮา"]))  # ['ก', 'ไก่', 'ไข่', 'ฮา']
diff --git a/examples/date.py b/examples/date.py
deleted file mode 100644
index 888d9c178..000000000
--- a/examples/date.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import datetime
-from pythainlp.util import thai_strftime
-
-fmt = "%Aที่ %-d %B พ.ศ. %Y เวลา %H:%Mน. (%a %d-%b-%y)"
-date = datetime.datetime(1976, 10, 6, 1, 40)
-
-# วันพุธที่ 6 ตุลาคม พ.ศ. 2519 เวลา 01:40น. (พ 06-ต.ค.-19)
-print(thai_strftime(date, fmt))
diff --git a/examples/etcc.py b/examples/etcc.py
deleted file mode 100644
index f732fdf11..000000000
--- a/examples/etcc.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import etcc
-
-print(etcc.etcc("คืนความสุข"))  # /คืน/ความสุข
diff --git a/examples/ner.py b/examples/ner.py
deleted file mode 100644
index 773859e84..000000000
--- a/examples/ner.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tag.named_entity import ThaiNameTagger
-ner = ThaiNameTagger()
-print(ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น."))
diff --git a/examples/normalize.py b/examples/normalize.py
deleted file mode 100644
index cac000306..000000000
--- a/examples/normalize.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.util import normalize
-
-print(normalize("เเปลก") == "แปลก")  # เ เ ป ล ก กับ แปลก
diff --git a/examples/soundex.py b/examples/soundex.py
deleted file mode 100644
index 9864ac747..000000000
--- a/examples/soundex.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.soundex import lk82, metasound, udom83
-
-texts = ["บูรณะ", "บูรณการ", "มัก", "มัค", "มรรค", "ลัก", "รัก", "รักษ์", ""]
-for text in texts:
-    print(
-        "{} - lk82: {} - udom83: {} - metasound: {}".format(
-            text, lk82(text), udom83(text), metasound(text)
-        )
-    )
-
-# check equivalence
-print(lk82("รถ") == lk82("รด"))
-print(udom83("วรร") == udom83("วัน"))
-print(metasound("นพ") == metasound("นภ"))
diff --git a/examples/spell.py b/examples/spell.py
deleted file mode 100644
index 92dbc49f3..000000000
--- a/examples/spell.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.corpus import ttc
-from pythainlp.spell import spell
-from pythainlp.spell.pn import NorvigSpellChecker
-from pythainlp.spell.pn import correct as pn_tnc_correct
-from pythainlp.spell.pn import spell as pn_tnc_spell
-
-# spell checker from pythainlp.spell module (generic)
-print(spell("สี่เหลียม"))  # ['สี่เหลี่ยม']
-
-# spell checker from pythainlp.spell.pn module (specified algorithm - Peter Norvig's)
-print(pn_tnc_spell("เหลืยม"))
-print(pn_tnc_correct("เหลืยม"))
-
-
-# spell checker from pythainlp.spell.pn module (specified algorithm, custom dictionary)
-ttc_word_freqs = ttc.word_freqs()
-pn_ttc_checker = NorvigSpellChecker(custom_dict=ttc_word_freqs)
-print(pn_ttc_checker.spell("เหลืยม"))
-print(pn_ttc_checker.correct("เหลืยม"))
-
-# apply different dictionary filter when creating spell checker
-pn_tnc_checker = NorvigSpellChecker()
-print(len(pn_tnc_checker.dictionary()))
-pn_tnc_checker_no_filter = NorvigSpellChecker(dict_filter=None)
-print(len(pn_tnc_checker_no_filter.dictionary()))
diff --git a/examples/tcc.py b/examples/tcc.py
deleted file mode 100644
index 4d95aed43..000000000
--- a/examples/tcc.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import tcc
-
-print(tcc.tcc("ประเทศไทย"))  # ป/ระ/เท/ศ/ไท/ย
-
-print(tcc.tcc_pos("ประเทศไทย"))  # {1, 3, 5, 6, 8, 9}
-
-for ch in tcc.tcc_gen("ประเทศไทย"):  # ป-ระ-เท-ศ-ไท-ย-
-    print(ch, end='-')
diff --git a/examples/tokenize.py b/examples/tokenize.py
deleted file mode 100644
index 0b8a0d00b..000000000
--- a/examples/tokenize.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.tokenize import sent_tokenize, word_tokenize
-
-text = "ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย "
-print(text)
-
-print(sent_tokenize(text))
-# ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย', '']
-
-print(word_tokenize(text))
-# ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']
-
-print(word_tokenize(text, whitespaces=False))
-# ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']
-
-text2 = "กฎหมายแรงงาน"
-print(text2)
-
-print(word_tokenize(text2))
-# ['กฎหมายแรงงาน']
-
-print(word_tokenize(text2, engine="longest"))
-# ['กฎหมาย', 'แรงงาน']
diff --git a/examples/transliterate.py b/examples/transliterate.py
deleted file mode 100644
index 97fb4e7f1..000000000
--- a/examples/transliterate.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# -*- coding: utf-8 -*-
-
-from pythainlp.transliterate import romanize, transliterate
-
-print(romanize("แมว"))
-print(transliterate("แมว"))
diff --git a/meta.yaml b/meta.yaml
index 0bc914207..651053e4e 100644
--- a/meta.yaml
+++ b/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "1.7.2" %}
+{% set version = "2.0.1" %}
 
 package:
   name: pythainlp
diff --git a/notebooks/pythainlp-get-started.ipynb b/notebooks/pythainlp-get-started.ipynb
new file mode 100644
index 000000000..806b9e47d
--- /dev/null
+++ b/notebooks/pythainlp-get-started.ipynb
@@ -0,0 +1,1077 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PyThaiNLP Get Started\n",
+    "\n",
+    "Code examples for basic functions in PyThaiNLP https://github.com/PyThaiNLP/pythainlp"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Thai Characters\n",
+    "\n",
+    "PyThaiNLP provides some ready-to-use Thai character set (e.g. Thai consonants, vowels, tonemarks, symbols) as a string for convenience. There are also few utility functions to test if a string is in Thai or not."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮฤฦะัาำิีึืุูเแโใไๅ็่้๊๋ฯๆฺ์ํ๎๏๚๛๐๑๒๓๔๕๖๗๘๙฿'"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp\n",
+    "\n",
+    "pythainlp.thai_characters"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.thai_consonants"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"๔\" in pythainlp.thai_digits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp.util\n",
+    "\n",
+    "pythainlp.util.isthai(\"ก\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "False"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.isthai(\"(ก.พ.)\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.isthai(\"(ก.พ.)\", ignore_chars=\".()\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100.0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.countthai(\"วันอาทิตย์ที่ 24 มีนาคม 2562\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.85714285714286"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.util.countthai(\"วันอาทิตย์ที่ 24 มีนาคม 2562\", ignore_chars=\"\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Collation\n",
+    "\n",
+    "Sorting according to Thai dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['กรรไกร', 'กระดาษ', 'ไข่', 'ค้อน', 'ผ้าไหม']"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import collate\n",
+    "\n",
+    "thai_words = [\"ค้อน\", \"กระดาษ\", \"กรรไกร\", \"ไข่\", \"ผ้าไหม\"]\n",
+    "collate(thai_words)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ผ้าไหม', 'ค้อน', 'ไข่', 'กระดาษ', 'กรรไกร']"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "collate(thai_words, reverse=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Date and Time Format\n",
+    "\n",
+    "Get Thai day and month names with Thai Buddhist Era (B.E.)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'วันพุธที่ 6 ตุลาคม พ.ศ. 2519 เวลา 01:40 น. (พ 06-ต.ค.-19)'"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import datetime\n",
+    "from pythainlp.util import thai_strftime\n",
+    "\n",
+    "fmt = \"%Aที่ %-d %B พ.ศ. %Y เวลา %H:%M น. (%a %d-%b-%y)\"\n",
+    "date = datetime.datetime(1976, 10, 6, 1, 40)\n",
+    "\n",
+    "thai_strftime(date, fmt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tokenization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Thai Character Cluster (TCC) and Extended TCC\n",
+    "\n",
+    "According to [Character Cluster Based Thai Information Retrieval](https://www.researchgate.net/publication/2853284_Character_Cluster_Based_Thai_Information_Retrieval) (Theeramunkong et al. 2004)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['ป', 'ระ', 'เท', 'ศ', 'ไท', 'ย']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import tcc\n",
+    "\n",
+    "tcc.tcc(\"ประเทศไทย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{1, 3, 5, 6, 8, 9}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tcc.tcc_pos(\"ประเทศไทย\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ป/ระ/เท/ศ/ไท/ย/"
+     ]
+    }
+   ],
+   "source": [
+    "for ch in tcc.tcc_gen(\"ประเทศไทย\"):\n",
+    "    print(ch, end='/')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Sentence and Word\n",
+    "\n",
+    "Default word tokenizer (\"newmm\") use maximum matching algorithm."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sent_tokenize: ['ฉันรักภาษาไทย', 'เพราะฉันใช้ภาษาไทย']\n",
+      "word_tokenize: ['ฉัน', 'รัก', 'ภาษาไทย', ' ', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย', ' ']\n",
+      "word_tokenize, without whitespace: ['ฉัน', 'รัก', 'ภาษาไทย', 'เพราะ', 'ฉัน', 'ใช้', 'ภาษาไทย']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp import sent_tokenize, word_tokenize\n",
+    "\n",
+    "text = \"ฉันรักภาษาไทย เพราะฉันใช้ภาษาไทย \"\n",
+    "\n",
+    "print(\"sent_tokenize:\", sent_tokenize(text))\n",
+    "print(\"word_tokenize:\", word_tokenize(text))\n",
+    "print(\"word_tokenize, without whitespace:\", word_tokenize(text, whitespaces=False))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Other algorithm can be chosen. We can also create a tokenizer with custom dictionary."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "newmm: ['กฎหมายแรงงาน', 'ฉบับ', 'ปรับปรุง', 'ใหม่', 'ประกาศ', 'ใช้แล้ว']\n",
+      "longest: ['กฎหมายแรงงาน', 'ฉบับ', 'ปรับปรุง', 'ใหม่', 'ประกาศใช้', 'แล้ว']\n",
+      "custom: ['กฎ', 'หมายแรง', 'งาน', 'ฉบับปรับปรุงใหม่ประกาศใช้แล้ว']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp import word_tokenize, Tokenizer\n",
+    "\n",
+    "text = \"กฎหมายแรงงานฉบับปรับปรุงใหม่ประกาศใช้แล้ว\"\n",
+    "\n",
+    "print(\"newmm:\", word_tokenize(text))  # default engine is \"newmm\"\n",
+    "print(\"longest:\", word_tokenize(text, engine=\"longest\"))\n",
+    "\n",
+    "words = [\"กฎ\", \"งาน\"]\n",
+    "custom_tokenizer = Tokenizer(words)\n",
+    "print(\"custom:\", custom_tokenizer.word_tokenize(text))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Default word tokenizer use a word list from pythainlp.corpus.common.thai_words().\n",
+    "We can get that list, add/remove words, and create new tokenizer from the modified list."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "newmm: ['ไอแซค', ' ', 'อสิ', 'มอ', 'ฟ']\n",
+      "custom: ['ไอแซค', ' ', 'อสิมอฟ']\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.corpus.common import thai_words\n",
+    "from pythainlp import word_tokenize, Tokenizer\n",
+    "\n",
+    "text = \"ไอแซค อสิมอฟ\"\n",
+    "\n",
+    "print(\"newmm:\", word_tokenize(text))\n",
+    "\n",
+    "words = set(thai_words())  # thai_words() returns frozenset\n",
+    "words.add(\"อสิมอฟ\")\n",
+    "custom_tokenizer = Tokenizer(words)\n",
+    "print(\"custom:\", custom_tokenizer.word_tokenize(text))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Transliteration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'maeo'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.transliterate import romanize\n",
+    "\n",
+    "romanize(\"แมว\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "mɛːw\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.transliterate import transliterate\n",
+    "\n",
+    "print(transliterate(\"แมว\"))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Normalization"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import normalize\n",
+    "\n",
+    "normalize(\"เเปลก\") == \"แปลก\"  # เ เ ป ล ก  vs แปลก"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Soundex\n",
+    "\n",
+    "\"Soundex is a phonetic algorithm for indexing names by sound.\" ([Wikipedia](https://en.wikipedia.org/wiki/Soundex)). PyThaiNLP provides three kinds of Thai soundex."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n",
+      "True\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.soundex import lk82, metasound, udom83\n",
+    "\n",
+    "# check equivalence\n",
+    "print(lk82(\"รถ\") == lk82(\"รด\"))\n",
+    "print(udom83(\"วรร\") == udom83(\"วัน\"))\n",
+    "print(metasound(\"นพ\") == metasound(\"นภ\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "บูรณะ - lk82: บE400 - udom83: บ930000 - metasound: บ550\n",
+      "บูรณการ - lk82: บE419 - udom83: บ931900 - metasound: บ551\n",
+      "มัก - lk82: ม1000 - udom83: ม100000 - metasound: ม100\n",
+      "มัค - lk82: ม1000 - udom83: ม100000 - metasound: ม100\n",
+      "มรรค - lk82: ม1000 - udom83: ม310000 - metasound: ม551\n",
+      "ลัก - lk82: ร1000 - udom83: ร100000 - metasound: ล100\n",
+      "รัก - lk82: ร1000 - udom83: ร100000 - metasound: ร100\n",
+      "รักษ์ - lk82: ร1000 - udom83: ร100000 - metasound: ร100\n",
+      " - lk82:  - udom83:  - metasound: \n"
+     ]
+    }
+   ],
+   "source": [
+    "texts = [\"บูรณะ\", \"บูรณการ\", \"มัก\", \"มัค\", \"มรรค\", \"ลัก\", \"รัก\", \"รักษ์\", \"\"]\n",
+    "for text in texts:\n",
+    "    print(\n",
+    "        \"{} - lk82: {} - udom83: {} - metasound: {}\".format(\n",
+    "            text, lk82(text), udom83(text), metasound(text)\n",
+    "        )\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Spellchecking\n",
+    "\n",
+    "Default spellchecker uses [Peter Norvig's algorithm](http://www.norvig.com/spell-correct.html) together with word frequency from Thai National Corpus (TNC)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['เหลียม', 'เหลือม']"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import spell\n",
+    "\n",
+    "# list possible spellings\n",
+    "spell(\"เหลืยม\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'เหลียม'"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp import correct\n",
+    "\n",
+    "# choose the most likely spelling\n",
+    "correct(\"เหลืยม\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Spellchecking - Custom dictionary and word frequency\n",
+    "\n",
+    "Custom dictionary can be provided when creating spellchecker."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['เหลือม']\n",
+      "เหลือม\n"
+     ]
+    }
+   ],
+   "source": [
+    "from pythainlp.corpus import ttc  # Thai Textbook Corpus\n",
+    "from pythainlp.spell import NorvigSpellChecker\n",
+    "\n",
+    "checker = NorvigSpellChecker(custom_dict=ttc.word_freqs())\n",
+    "print(checker.spell(\"เหลืยม\"))\n",
+    "print(checker.correct(\"เหลืยม\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('จะ', 51681),\n",
+       " ('เป็น', 51273),\n",
+       " ('ไป', 46567),\n",
+       " ('ก็', 46409),\n",
+       " ('ไม่', 45895),\n",
+       " ('มี', 44899),\n",
+       " ('ได้', 44513),\n",
+       " ('ว่า', 40290),\n",
+       " ('ให้', 38715)]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "list(checker.dictionary())[1:10]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also apply conditions and filter function to dictionary when creating spellchecker."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "39977"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker = NorvigSpellChecker()  # use default filter (remove any word with number or non-Thai character)\n",
+    "len(checker.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "30379"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker = NorvigSpellChecker(min_freq=5, min_len=2, max_len=15)\n",
+    "len(checker.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "76706"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "checker_no_filter = NorvigSpellChecker(dict_filter=None)  # use no filter\n",
+    "len(checker_no_filter.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "76700"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def remove_yamok(word):\n",
+    "    return False if \"ๆ\" in word else True\n",
+    "\n",
+    "checker_custom_filter = NorvigSpellChecker(dict_filter=remove_yamok)  # use custom filter\n",
+    "len(checker_custom_filter.dictionary())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Part-of-Speech Tagging"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('การ', 'FIXN'), ('เดินทาง', 'VACT')]"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.tag import pos_tag, pos_tag_sents\n",
+    "\n",
+    "pos_tag([\"การ\",\"เดินทาง\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[[('ราชกิจจานุเบกษา', 'NCMN'),\n",
+       "  ('เผยแพร่', 'VACT'),\n",
+       "  ('ประกาศสำนักนายกฯ', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('ให้', 'JSBR'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  (\"'พล.ท.สรรเสริญ แก้วกำเนิด'\", 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('พ้นจากตำแหน่ง', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('ผู้ทรงคุณวุฒิพิเศษ', 'NCMN'),\n",
+       "  ('กองทัพบก', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  ('กระทรวงกลาโหม', 'NCMN')],\n",
+       " [('และ', 'JCRG'),\n",
+       "  ('แต่งตั้ง', 'VACT'),\n",
+       "  ('ให้', 'JSBR'),\n",
+       "  ('เป็น', 'VSTA'),\n",
+       "  ('ข้าราชการ', 'NCMN'),\n",
+       "  ('พลเรือน', 'NCMN'),\n",
+       "  ('สามัญ', 'NCMN'),\n",
+       "  ('ตำแหน่ง', 'NCMN'),\n",
+       "  (' ', 'PUNC'),\n",
+       "  (\"'อธิบดีกรมประชาสัมพันธ์'\", 'NCMN')]]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sents = [[\"ราชกิจจานุเบกษา\", \"เผยแพร่\", \"ประกาศสำนักนายกฯ\", \" \", \"ให้\",\n",
+    "    \" \", \"'พล.ท.สรรเสริญ แก้วกำเนิด'\", \" \", \"พ้นจากตำแหน่ง\",\n",
+    "    \" \", \"ผู้ทรงคุณวุฒิพิเศษ\", \"กองทัพบก\", \" \", \"กระทรวงกลาโหม\"],\n",
+    "    [\"และ\",\"แต่งตั้ง\",\"ให้\", \"เป็น\", \"ข้าราชการ\", \"พลเรือน\", \"สามัญ\",\n",
+    "    \"ตำแหน่ง\", \" \", \"'อธิบดีกรมประชาสัมพันธ์'\"]]\n",
+    "\n",
+    "pos_tag_sents(sents)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Named-Entity Tagging\n",
+    "\n",
+    "The tagger use BIO scheme:\n",
+    "- B - beginning of entity\n",
+    "- I - inside entity\n",
+    "- O - outside entity"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('วันที่', 'NOUN', 'O'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('15', 'NUM', 'B-DATE'),\n",
+       " (' ', 'PUNCT', 'I-DATE'),\n",
+       " ('ก.ย.', 'NOUN', 'I-DATE'),\n",
+       " (' ', 'PUNCT', 'I-DATE'),\n",
+       " ('61', 'NUM', 'I-DATE'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('ทดสอบ', 'VERB', 'O'),\n",
+       " ('ระบบ', 'NOUN', 'O'),\n",
+       " ('เวลา', 'NOUN', 'O'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('14', 'NOUN', 'B-TIME'),\n",
+       " (':', 'PUNCT', 'I-TIME'),\n",
+       " ('49', 'NUM', 'I-TIME'),\n",
+       " (' ', 'PUNCT', 'I-TIME'),\n",
+       " ('น.', 'NOUN', 'I-TIME')]"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.tag.named_entity import ThaiNameTagger\n",
+    "\n",
+    "ner = ThaiNameTagger()\n",
+    "ner.get_ner(\"วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Word Vector"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:summarizer.preprocessing.cleaner:'pattern' package not found; tag filters are not available for English\n",
+      "INFO:gensim.models.utils_any2vec:loading projection weights from /Users/arthit/pythainlp-data/thai2vec.bin\n",
+      "INFO:gensim.models.utils_any2vec:loaded (60001, 400) matrix from /Users/arthit/pythainlp-data/thai2vec.bin\n",
+      "/usr/local/lib/python3.7/site-packages/gensim/matutils.py:737: FutureWarning: Conversion of the second argument of issubdtype from `int` to `np.signedinteger` is deprecated. In future, it will be treated as `np.int64 == np.dtype(int).type`.\n",
+      "  if np.issubdtype(vec.dtype, np.int):\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "0.99259853"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pythainlp.word_vector\n",
+    "\n",
+    "pythainlp.word_vector.similarity(\"คน\", \"มนุษย์\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:gensim.models.keyedvectors:precomputing L2-norms of word weight vectors\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "'แมว'"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pythainlp.word_vector.doesnt_match([\"คน\", \"มนุษย์\", \"บุคคล\", \"เจ้าหน้าที่\", \"แมว\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Number Spell Out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'หนึ่งล้านสองแสนสามหมื่นสี่พันห้าร้อยหกสิบเจ็ดล้านแปดแสนเก้าหมื่นหนึ่งร้อยยี่สิบสามบาทสี่สิบห้าสตางค์'"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pythainlp.util import bahttext\n",
+    "\n",
+    "bahttext(1234567890123.45)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'หนึ่งบาทเก้าสิบเอ็ดสตางค์'"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bahttext(1.909)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/notebooks/sentiment_analysis.ipynb b/notebooks/sentiment_analysis.ipynb
index 58b659687..a1ab56694 100644
--- a/notebooks/sentiment_analysis.ipynb
+++ b/notebooks/sentiment_analysis.ipynb
@@ -47,12 +47,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import pandas as pd\n",
+    "import re\n",
+    "\n",
+    "import emoji\n",
     "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
     "from pythainlp import word_tokenize\n",
     "from tqdm import tqdm_notebook\n",
-    "import re\n",
-    "import emoji\n",
     "\n",
     "#viz\n",
     "import matplotlib.pyplot as plt\n",
@@ -79,8 +81,8 @@
     "def replace_rep(text):\n",
     "    def _replace_rep(m):\n",
     "        c,cc = m.groups()\n",
-    "        return f'{c}xxrep'\n",
-    "    re_rep = re.compile(r'(\\S)(\\1{2,})')\n",
+    "        return f\"{c}xxrep\"\n",
+    "    re_rep = re.compile(r\"(\\S)(\\1{2,})\")\n",
     "    return re_rep.sub(_replace_rep, text)\n",
     "\n",
     "def ungroup_emoji(toks):\n",
@@ -100,7 +102,7 @@
     "    res = replace_rep(res)\n",
     "    \n",
     "    #tokenize\n",
-    "    res = [word for word in word_tokenize(res, engine='ulmfit') if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
+    "    res = [word for word in word_tokenize(res, engine=\"ulmfit\") if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
     "    \n",
     "    #post rules\n",
     "    res = ungroup_emoji(res)\n",
@@ -123,15 +125,13 @@
    },
    "outputs": [],
    "source": [
-    "with open('train.txt') as f:\n",
+    "with open(\"train.txt\") as f:\n",
     "    texts = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "with open('train_label.txt') as f:\n",
+    "with open(\"train_label.txt\") as f:\n",
     "    categories = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "all_df = pd.DataFrame({'category':categories, 'texts':texts})\n",
+    "all_df = pd.DataFrame({\"category\":categories, \"texts\":texts})\n",
     "all_df.shape"
    ]
   },
@@ -141,11 +141,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('test.txt') as f:\n",
+    "with open(\"test.txt\") as f:\n",
     "    texts = [line.strip() for line in f.readlines()]\n",
-    "f.close()\n",
     "\n",
-    "test_df = pd.DataFrame({'category':'test', 'texts':texts})\n",
+    "test_df = pd.DataFrame({\"category\":\"test\", \"texts\":texts})\n",
     "test_df.shape"
    ]
   },
@@ -162,16 +161,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_df = pd.read_csv('all_df.csv')\n",
-    "test_df = pd.read_csv('test_df.csv')\n",
+    "all_df = pd.read_csv(\"all_df.csv\")\n",
+    "test_df = pd.read_csv(\"test_df.csv\")\n",
     "\n",
-    "all_df['processed'] = all_df.texts.map(lambda x: '|'.join(process_text(x)))\n",
-    "all_df['wc'] = all_df.processed.map(lambda x: len(x.split('|')))\n",
-    "all_df['uwc'] = all_df.processed.map(lambda x: len(set(x.split('|'))))\n",
+    "all_df[\"processed\"] = all_df.texts.map(lambda x: \"|\".join(process_text(x)))\n",
+    "all_df[\"wc\"] = all_df.processed.map(lambda x: len(x.split(\"|\")))\n",
+    "all_df[\"uwc\"] = all_df.processed.map(lambda x: len(set(x.split(\"|\"))))\n",
     "\n",
-    "test_df['processed'] = test_df.texts.map(lambda x: '|'.join(process_text(x)))\n",
-    "test_df['wc'] = test_df.processed.map(lambda x: len(x.split('|')))\n",
-    "test_df['uwc'] = test_df.processed.map(lambda x: len(set(x.split('|'))))"
+    "test_df[\"processed\"] = test_df.texts.map(lambda x: \"|\".join(process_text(x)))\n",
+    "test_df[\"wc\"] = test_df.processed.map(lambda x: len(x.split(\"|\")))\n",
+    "test_df[\"uwc\"] = test_df.processed.map(lambda x: len(set(x.split(\"|\"))))"
    ]
   },
   {
@@ -352,7 +351,7 @@
    ],
    "source": [
     "#prevalence\n",
-    "print(train_df['category'].value_counts() / train_df.shape[0])"
+    "print(train_df[\"category\"].value_counts() / train_df.shape[0])"
    ]
   },
   {
@@ -374,7 +373,7 @@
    ],
    "source": [
     "#prevalence\n",
-    "print(valid_df['category'].value_counts() / valid_df.shape[0])"
+    "print(valid_df[\"category\"].value_counts() / valid_df.shape[0])"
    ]
   },
   {
@@ -398,8 +397,8 @@
    "outputs": [],
    "source": [
     "#dependent variables\n",
-    "y_train = train_df['category']\n",
-    "y_valid = valid_df['category']"
+    "y_train = train_df[\"category\"]\n",
+    "y_valid = valid_df[\"category\"]"
    ]
   },
   {
@@ -424,10 +423,10 @@
     "from sklearn.linear_model import LogisticRegression\n",
     "\n",
     "tfidf = TfidfVectorizer(tokenizer=process_text, ngram_range=(1,2), min_df=20, sublinear_tf=True)\n",
-    "tfidf_fit = tfidf.fit(all_df['texts'])\n",
-    "text_train = tfidf_fit.transform(train_df['texts'])\n",
-    "text_valid = tfidf_fit.transform(valid_df['texts'])\n",
-    "text_test = tfidf_fit.transform(test_df['texts'])\n",
+    "tfidf_fit = tfidf.fit(all_df[\"texts\"])\n",
+    "text_train = tfidf_fit.transform(train_df[\"texts\"])\n",
+    "text_valid = tfidf_fit.transform(valid_df[\"texts\"])\n",
+    "text_test = tfidf_fit.transform(test_df[\"texts\"])\n",
     "text_train.shape, text_valid.shape"
    ]
   },
@@ -459,11 +458,11 @@
     "from sklearn.preprocessing import StandardScaler\n",
     "\n",
     "scaler = StandardScaler()\n",
-    "scaler_fit = scaler.fit(all_df[['wc','uwc']].astype(float))\n",
+    "scaler_fit = scaler.fit(all_df[[\"wc\",\"uwc\"]].astype(float))\n",
     "print(scaler_fit.mean_, scaler_fit.var_)\n",
-    "num_train = scaler_fit.transform(train_df[['wc','uwc']].astype(float))\n",
-    "num_valid = scaler_fit.transform(valid_df[['wc','uwc']].astype(float))\n",
-    "num_test = scaler_fit.transform(test_df[['wc','uwc']].astype(float))\n",
+    "num_train = scaler_fit.transform(train_df[[\"wc\",\"uwc\"]].astype(float))\n",
+    "num_valid = scaler_fit.transform(valid_df[[\"wc\",\"uwc\"]].astype(float))\n",
+    "num_test = scaler_fit.transform(test_df[[\"wc\",\"uwc\"]].astype(float))\n",
     "num_train.shape, num_valid.shape"
    ]
   },
@@ -516,7 +515,7 @@
    ],
    "source": [
     "#fit logistic regression models\n",
-    "model = LogisticRegression(C=2., penalty='l2', solver='liblinear', dual=False, multi_class='ovr')\n",
+    "model = LogisticRegression(C=2., penalty=\"l2\", solver=\"liblinear\", dual=False, multi_class=\"ovr\")\n",
     "model.fit(X_train,y_train)\n",
     "model.score(X_valid,y_valid)"
    ]
@@ -537,14 +536,14 @@
     "probs = model.predict_proba(X_valid)\n",
     "probs_df = pd.DataFrame(probs)\n",
     "probs_df.columns = model.classes_\n",
-    "probs_df['preds'] = model.predict(X_valid)\n",
-    "probs_df['category'] = valid_df.category\n",
-    "probs_df['texts'] = valid_df.texts\n",
-    "probs_df['processed'] = valid_df.processed\n",
-    "probs_df['wc'] = valid_df.wc\n",
-    "probs_df['uwc'] = valid_df.uwc\n",
-    "probs_df['hit'] = (probs_df.preds==probs_df.category)\n",
-    "probs_df.to_csv('probs_df_linear.csv',index=False)"
+    "probs_df[\"preds\"] = model.predict(X_valid)\n",
+    "probs_df[\"category\"] = valid_df.category\n",
+    "probs_df[\"texts\"] = valid_df.texts\n",
+    "probs_df[\"processed\"] = valid_df.processed\n",
+    "probs_df[\"wc\"] = valid_df.wc\n",
+    "probs_df[\"uwc\"] = valid_df.uwc\n",
+    "probs_df[\"hit\"] = (probs_df.preds==probs_df.category)\n",
+    "probs_df.to_csv(\"probs_df_linear.csv\", index=False)"
    ]
   },
   {
@@ -577,10 +576,10 @@
     "\n",
     "conf_mat = confusion_matrix(probs_df.category,probs_df.preds)\n",
     "print(model.score(X_valid,y_valid))\n",
-    "sns.heatmap(conf_mat, annot=True, fmt='d',\n",
+    "sns.heatmap(conf_mat, annot=True, fmt=\"d\",\n",
     "            xticklabels=model.classes_, yticklabels=model.classes_)\n",
-    "plt.ylabel('Actual')\n",
-    "plt.xlabel('Predicted')\n",
+    "plt.ylabel(\"Actual\")\n",
+    "plt.xlabel(\"Predicted\")\n",
     "plt.show()"
    ]
   },
@@ -601,8 +600,8 @@
     "from fastai.callbacks import CSVLogger, SaveModelCallback\n",
     "from pythainlp.ulmfit import *\n",
     "\n",
-    "model_path = 'wisesight_data/'\n",
-    "all_df = pd.read_csv('all_df.csv')\n",
+    "model_path = \"wisesight_data/\"\n",
+    "all_df = pd.read_csv(\"all_df.csv\")\n",
     "train_df, valid_df = train_test_split(all_df, test_size=0.15, random_state=1412)"
    ]
   },
@@ -619,11 +618,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tt = Tokenizer(tok_func = ThaiTokenizer, lang = 'th', pre_rules = pre_rules_th, post_rules=post_rules_th)\n",
+    "tt = Tokenizer(tok_func=ThaiTokenizer, lang=\"th\", pre_rules=pre_rules_th, post_rules=post_rules_th)\n",
     "processor = [TokenizeProcessor(tokenizer=tt, chunksize=10000, mark_fields=False),\n",
     "            NumericalizeProcessor(vocab=None, max_vocab=60000, min_freq=2)]\n",
     "\n",
-    "data_lm = (TextList.from_df(all_df, model_path, cols='texts', processor=processor)\n",
+    "data_lm = (TextList.from_df(all_df, model_path, cols=\"texts\", processor=processor)\n",
     "    .random_split_by_pct(valid_pct = 0.01, seed = 1412)\n",
     "    .label_for_lm()\n",
     "    .databunch(bs=48))\n",
@@ -708,7 +707,7 @@
    ],
    "source": [
     "#train frozen\n",
-    "print('training frozen')\n",
+    "print(\"training frozen\")\n",
     "learn.freeze_to(-1)\n",
     "learn.fit_one_cycle(1, 1e-2, moms=(0.8, 0.7))"
    ]
@@ -777,7 +776,7 @@
    ],
    "source": [
     "#train unfrozen\n",
-    "print('training unfrozen')\n",
+    "print(\"training unfrozen\")\n",
     "learn.unfreeze()\n",
     "learn.fit_one_cycle(5, 1e-3, moms=(0.8, 0.7))"
    ]
@@ -789,7 +788,7 @@
    "outputs": [],
    "source": [
     "# learn.save('wisesight_lm')\n",
-    "learn.save_encoder('wisesight_enc')"
+    "learn.save_encoder(\"wisesight_enc\")"
    ]
   },
   {
@@ -814,17 +813,17 @@
    ],
    "source": [
     "#lm data\n",
-    "data_lm = load_data(model_path,'wisesight_lm.pkl')\n",
+    "data_lm = load_data(model_path, \"wisesight_lm.pkl\")\n",
     "data_lm.sanity_check()\n",
     "\n",
     "#classification data\n",
-    "tt = Tokenizer(tok_func = ThaiTokenizer, lang = 'th', pre_rules = pre_rules_th, post_rules=post_rules_th)\n",
+    "tt = Tokenizer(tok_func=ThaiTokenizer, lang=\"th\", pre_rules=pre_rules_th, post_rules=post_rules_th)\n",
     "processor = [TokenizeProcessor(tokenizer=tt, chunksize=10000, mark_fields=False),\n",
     "            NumericalizeProcessor(vocab=data_lm.vocab, max_vocab=60000, min_freq=20)]\n",
     "\n",
-    "data_cls = (ItemLists(model_path,train=TextList.from_df(train_df, model_path, cols=['texts'], processor=processor),\n",
-    "                     valid=TextList.from_df(valid_df, model_path, cols=['texts'], processor=processor))\n",
-    "    .label_from_df('category')\n",
+    "data_cls = (ItemLists(model_path,train=TextList.from_df(train_df, model_path, cols=[\"texts\"], processor=processor),\n",
+    "                     valid=TextList.from_df(valid_df, model_path, cols=[\"texts\"], processor=processor))\n",
+    "    .label_from_df(\"category\")\n",
     "    .databunch(bs=50)\n",
     "    )\n",
     "data_cls.sanity_check()\n",
@@ -844,7 +843,7 @@
     "\n",
     "learn = text_classifier_learner(data_cls, AWD_LSTM, config=config, pretrained=False, **trn_args)\n",
     "#load pretrained finetuned model\n",
-    "learn.load_encoder('wisesight_enc')"
+    "learn.load_encoder(\"wisesight_enc\")"
    ]
   },
   {
@@ -909,7 +908,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "learn.load('bestmodel');\n",
+    "learn.load(\"bestmodel\")\n",
+    "\n",
     "#get predictions\n",
     "probs, y_true, loss = learn.get_preds(ds_type = DatasetType.Valid, ordered=True, with_loss=True)\n",
     "classes = learn.data.train_ds.classes\n",
@@ -938,9 +938,9 @@
    "source": [
     "to_df = np.concatenate([y_true[:,None],preds[:,None],loss[:,None],prob],1)\n",
     "probs_df = pd.DataFrame(to_df)\n",
-    "probs_df.columns = ['category','preds','loss'] + classes\n",
-    "probs_df['hit'] = (probs_df.category == probs_df.preds)\n",
-    "probs_df['texts'] = valid_df.texts\n",
+    "probs_df.columns = [\"category\",\"preds\",\"loss\"] + classes\n",
+    "probs_df[\"hit\"] = (probs_df.category == probs_df.preds)\n",
+    "probs_df[\"texts\"] = valid_df.texts\n",
     "(y_true==preds).mean()"
    ]
   },
@@ -967,10 +967,10 @@
     "import seaborn as sns\n",
     "\n",
     "conf_mat = confusion_matrix(probs_df.category,probs_df.preds)\n",
-    "sns.heatmap(conf_mat, annot=True, fmt='d',\n",
+    "sns.heatmap(conf_mat, annot=True, fmt=\"d\",\n",
     "            xticklabels=classes, yticklabels=classes)\n",
-    "plt.ylabel('Actual')\n",
-    "plt.xlabel('Predicted')\n",
+    "plt.ylabel(\"Actual\")\n",
+    "plt.xlabel(\"Predicted\")\n",
     "plt.show()"
    ]
   }
@@ -991,7 +991,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.7.3"
   }
  },
  "nbformat": 4,
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
index 7d873a9a1..9ab5ff1ad 100644
--- a/pythainlp/__init__.py
+++ b/pythainlp/__init__.py
@@ -1,6 +1,6 @@
 ﻿# -*- coding: utf-8 -*-
 
-__version__ = 2.0
+__version__ = "2.0.1"
 
 thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"  # 44 chars
 thai_vowels = "ฤฦะ\u0e31าำ\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39เแโใไ\u0e45\u0e47"  # 19
@@ -25,8 +25,8 @@
 
 
 from pythainlp.soundex import soundex
-from pythainlp.spell import spell
+from pythainlp.spell import correct, spell
 from pythainlp.tag import pos_tag
-from pythainlp.tokenize import sent_tokenize, tcc, word_tokenize
+from pythainlp.tokenize import sent_tokenize, tcc, word_tokenize, Tokenizer
 from pythainlp.transliterate import romanize, transliterate
 from pythainlp.util import collate, thai_strftime
diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py
index f41744dac..855215bc0 100644
--- a/pythainlp/corpus/__init__.py
+++ b/pythainlp/corpus/__init__.py
@@ -113,7 +113,7 @@ def download(name: str, force: bool = False):
     data_json = data.json()
     if name in list(data_json.keys()):
         temp_name = data_json[name]
-        print("Download : " + name)
+        print("Download: " + name)
 
         if not db.search(temp.name == name):
             print(name + " " + temp_name["version"])
diff --git a/pythainlp/soundex/__init__.py b/pythainlp/soundex/__init__.py
index 30cfcd0a7..fac5f978d 100644
--- a/pythainlp/soundex/__init__.py
+++ b/pythainlp/soundex/__init__.py
@@ -12,7 +12,7 @@
 # [KSS97] https://linux.thai.net/~thep/soundex/soundex.html
 
 
-def soundex(text, engine="udom83"):
+def soundex(text: str, engine="udom83") -> str:
     """
     Thai Soundex
 
diff --git a/pythainlp/soundex/lk82.py b/pythainlp/soundex/lk82.py
index f7b21a764..e0dee6d6b 100644
--- a/pythainlp/soundex/lk82.py
+++ b/pythainlp/soundex/lk82.py
@@ -21,7 +21,7 @@
 _RE_3 = re.compile(r"[็ํฺๆฯ]")
 
 
-def lk82(text):
+def lk82(text: str) -> str:
     """
     LK82 - It's a Thai soundex rule.
 
diff --git a/pythainlp/soundex/metasound.py b/pythainlp/soundex/metasound.py
index c5f7f8233..6998f81a9 100644
--- a/pythainlp/soundex/metasound.py
+++ b/pythainlp/soundex/metasound.py
@@ -20,7 +20,7 @@
 _C8 = "ว"  # W -> 8
 
 
-def metasound(text, length=4):
+def metasound(text: str, length: int = 4) -> str:
     """
     Thai MetaSound
 
diff --git a/pythainlp/soundex/udom83.py b/pythainlp/soundex/udom83.py
index bf7ec5bba..dce60feaa 100644
--- a/pythainlp/soundex/udom83.py
+++ b/pythainlp/soundex/udom83.py
@@ -29,7 +29,7 @@
 )
 
 
-def udom83(text):
+def udom83(text: str) -> str:
     """
     Udom83 - It's a Thai soundex rule.
 
diff --git a/pythainlp/spell/__init__.py b/pythainlp/spell/__init__.py
index cfd06682b..c4b654f53 100644
--- a/pythainlp/spell/__init__.py
+++ b/pythainlp/spell/__init__.py
@@ -3,11 +3,14 @@
 Spell checking
 """
 
-from .pn import correct as pn_correct
-from .pn import spell as pn_spell
+from typing import List
 
+from .pn import DEFAULT_SPELL_CHECKER, NorvigSpellChecker
 
-def spell(word, engine="pn"):
+__all__ = ["DEFAULT_SPELL_CHECKER", "correct", "spell", "NorvigSpellChecker"]
+
+
+def spell(word: str, engine="pn") -> List[str]:
     """
     :param str word: word to check spelling
     :param str engine:
@@ -15,10 +18,10 @@ def spell(word, engine="pn"):
     :return: list of words
     """
 
-    return pn_spell(word)
+    return DEFAULT_SPELL_CHECKER.spell(word)
 
 
-def correct(word, engine="pn"):
+def correct(word: str, engine="pn") -> str:
     """
     :param str word: word to correct spelling
     :param str engine:
@@ -26,4 +29,4 @@ def correct(word, engine="pn"):
     :return: the corrected word
     """
 
-    return pn_correct(word)
+    return DEFAULT_SPELL_CHECKER.correct(word)
diff --git a/pythainlp/spell/pn.py b/pythainlp/spell/pn.py
index 84def66f3..ddce3d5c7 100644
--- a/pythainlp/spell/pn.py
+++ b/pythainlp/spell/pn.py
@@ -7,26 +7,33 @@
 Based on Peter Norvig's Python code from http://norvig.com/spell-correct.html
 """
 from collections import Counter
+from typing import Callable, List, Set, Tuple
 
 from pythainlp import thai_letters
 from pythainlp.corpus import tnc
-from pythainlp.util import is_thaichar
+from pythainlp.util import isthaichar
 
 
-def _no_filter(word):
+def _no_filter(word: str) -> bool:
     return True
 
 
-def _is_thai_and_not_num(word):
+def _is_thai_and_not_num(word: str) -> bool:
     for ch in word:
-        if ch != "." and not is_thaichar(ch):
+        if ch != "." and not isthaichar(ch):
             return False
         if ch in "๐๑๒๓๔๕๖๗๘๙0123456789":
             return False
     return True
 
 
-def _keep(word_freq, min_freq, min_len, max_len, dict_filter):
+def _keep(
+    word_freq: int,
+    min_freq: int,
+    min_len: int,
+    max_len: int,
+    dict_filter: Callable[[str], bool],
+):
     """
     Keep only Thai words with at least min_freq frequency
     and has length between min_len and max_len characters
@@ -41,7 +48,7 @@ def _keep(word_freq, min_freq, min_len, max_len, dict_filter):
     return dict_filter(word)
 
 
-def _edits1(word):
+def _edits1(word: str) -> Set[str]:
     """
     Return a set of words with edit distance of 1 from the input word
     """
@@ -54,7 +61,7 @@ def _edits1(word):
     return set(deletes + transposes + replaces + inserts)
 
 
-def _edits2(word):
+def _edits2(word: str) -> Set[str]:
     """
     Return a set of words with edit distance of 2 from the input word
     """
@@ -64,11 +71,11 @@ def _edits2(word):
 class NorvigSpellChecker:
     def __init__(
         self,
-        custom_dict=None,
-        min_freq=2,
-        min_len=2,
-        max_len=40,
-        dict_filter=_is_thai_and_not_num,
+        custom_dict: List[Tuple[str, int]] = None,
+        min_freq: int = 2,
+        min_len: int = 2,
+        max_len: int = 40,
+        dict_filter: Callable[[str], bool] = _is_thai_and_not_num,
     ):
         """
         Initialize Peter Norvig's spell checker object
@@ -97,13 +104,13 @@ def __init__(
         if self.__WORDS_TOTAL < 1:
             self.__WORDS_TOTAL = 0
 
-    def dictionary(self):
+    def dictionary(self) -> List[Tuple[str, int]]:
         """
         Return the spelling dictionary currently used by this spell checker
         """
         return self.__WORDS.items()
 
-    def known(self, words):
+    def known(self, words: List[str]) -> List[str]:
         """
         Return a list of given words that found in the spelling dictionary
 
@@ -111,7 +118,7 @@ def known(self, words):
         """
         return list(w for w in words if w in self.__WORDS)
 
-    def prob(self, word):
+    def prob(self, word: str) -> float:
         """
         Return probability of an input word, according to the spelling dictionary
 
@@ -119,7 +126,7 @@ def prob(self, word):
         """
         return self.__WORDS[word] / self.__WORDS_TOTAL
 
-    def freq(self, word):
+    def freq(self, word: str) -> int:
         """
         Return frequency of an input word, according to the spelling dictionary
 
@@ -127,7 +134,7 @@ def freq(self, word):
         """
         return self.__WORDS[word]
 
-    def spell(self, word):
+    def spell(self, word: str) -> List[str]:
         """
         Return a list of possible words, according to edit distance of 1 and 2,
         sorted by frequency of word occurrance in the spelling dictionary
@@ -147,7 +154,7 @@ def spell(self, word):
 
         return candidates
 
-    def correct(self, word):
+    def correct(self, word: str) -> str:
         """
         Return the most possible word, using the probability from the spelling dictionary
 
@@ -160,49 +167,3 @@ def correct(self, word):
 
 
 DEFAULT_SPELL_CHECKER = NorvigSpellChecker()
-
-
-def dictionary():
-    """
-    Return the spelling dictionary currently used by this spell checker.
-    The spelling dictionary is based on words found in the Thai National Corpus.
-    """
-    return DEFAULT_SPELL_CHECKER.dictionary()
-
-
-def known(words):
-    """
-    Return a list of given words that found in the spelling dictionary.
-    The spelling dictionary is based on words found in the Thai National Corpus.
-
-    :param str words: A list of words to check if they are in the spelling dictionary
-    """
-    return DEFAULT_SPELL_CHECKER.known(words)
-
-
-def prob(word):
-    """
-    Return probability of an input word, according to the Thai National Corpus
-
-    :param str word: A word to check its probability of occurrence
-    """
-    return DEFAULT_SPELL_CHECKER.prob(word)
-
-
-def spell(word):
-    """
-    Return a list of possible words, according to edit distance of 1 and 2,
-    sorted by probability of word occurrance in the Thai National Corpus.
-
-    :param str word: A word to check its spelling
-    """
-    return DEFAULT_SPELL_CHECKER.spell(word)
-
-
-def correct(word):
-    """
-    Return the most possible word, according to probability from the Thai National Corpus
-
-    :param str word: A word to correct its spelling
-    """
-    return DEFAULT_SPELL_CHECKER.correct(word)
diff --git a/pythainlp/summarize/freq.py b/pythainlp/summarize/freq.py
index c7bc25ff9..2dc7044fd 100644
--- a/pythainlp/summarize/freq.py
+++ b/pythainlp/summarize/freq.py
@@ -33,10 +33,10 @@ def __compute_frequencies(self, word_tokenized_sents):
 
         return word_freqs
 
-    def __rank(self, ranking, n):
+    def __rank(self, ranking, n: int):
         return nlargest(n, ranking, key=ranking.get)
 
-    def summarize(self, text, n, tokenizer):
+    def summarize(self, text: str, n: int, tokenizer: str):
         sents = sent_tokenize(text)
         word_tokenized_sents = [word_tokenize(sent, tokenizer) for sent in sents]
         self.__freq = self.__compute_frequencies(word_tokenized_sents)
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index 9b0232b78..985991415 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -4,31 +4,29 @@
 such as its part of speech and class of named-entity.
 """
 
-__all__ = [
-    "pos_tag",
-    "pos_tag_sents",
-    "tag_provinces"
-]
+from typing import List, Tuple
+
+__all__ = ["pos_tag", "pos_tag_sents", "tag_provinces"]
 from .locations import tag_provinces
 
 # tag map for orchid to Universal Dependencies
-# from Korakot Chaovavanich 
+# from Korakot Chaovavanich
 _TAG_MAP_UD = {
-    #NOUN
-    "NOUN":"NOUN",
-    "NCMN":"NOUN",
-    "NTTL":"NOUN",
-    "CNIT":"NOUN",
-    "CLTV":"NOUN",
-    "CMTR":"NOUN",
-    "CFQC":"NOUN",
-    "CVBL":"NOUN",
+    # NOUN
+    "NOUN": "NOUN",
+    "NCMN": "NOUN",
+    "NTTL": "NOUN",
+    "CNIT": "NOUN",
+    "CLTV": "NOUN",
+    "CMTR": "NOUN",
+    "CFQC": "NOUN",
+    "CVBL": "NOUN",
     # VERB
-    "VACT":"VERB",
-    "VSTA":"VERB",
-    #PRON
-    "PRON":"PRON",
-    "NPRP":"PRON",
+    "VACT": "VERB",
+    "VSTA": "VERB",
+    # PRON
+    "PRON": "PRON",
+    "NPRP": "PRON",
     # ADJ
     "ADJ": "ADJ",
     "NONM": "ADJ",
@@ -40,13 +38,13 @@
     "ADVI": "ADV",
     "ADVP": "ADV",
     "ADVS": "ADV",
-	# INT
+    # INT
     "INT": "INTJ",
     # PRON
-    "PROPN":"PROPN",
-    "PPRS":"PROPN",
-    "PDMN":"PROPN",
-    "PNTR":"PROPN",
+    "PROPN": "PROPN",
+    "PPRS": "PROPN",
+    "PDMN": "PROPN",
+    "PNTR": "PROPN",
     # DET
     "DET": "DET",
     "DDAN": "DET",
@@ -62,51 +60,69 @@
     "NCNM": "NUM",
     "NLBL": "NUM",
     "DCNM": "NUM",
-	# AUX
+    # AUX
     "AUX": "AUX",
     "XVBM": "AUX",
     "XVAM": "AUX",
     "XVMM": "AUX",
     "XVBB": "AUX",
     "XVAE": "AUX",
-	# ADP
+    # ADP
     "ADP": "ADP",
     "RPRE": "ADP",
     # CCONJ
-    "CCONJ":"CCONJ",
-    "JCRG":"CCONJ",
-	# SCONJ
-    "SCONJ":"SCONJ",
-    "PREL":"SCONJ",
-    "JSBR":"SCONJ",
-    "JCMP":"SCONJ",
+    "CCONJ": "CCONJ",
+    "JCRG": "CCONJ",
+    # SCONJ
+    "SCONJ": "SCONJ",
+    "PREL": "SCONJ",
+    "JSBR": "SCONJ",
+    "JCMP": "SCONJ",
     # PART
-    "PART":"PART",
-    "FIXN":"PART",
-    "FIXV":"PART",
-    "EAFF":"PART",
-    "EITT":"PART",
-    "AITT":"PART",
-    "NEG":"PART",
+    "PART": "PART",
+    "FIXN": "PART",
+    "FIXV": "PART",
+    "EAFF": "PART",
+    "EITT": "PART",
+    "AITT": "PART",
+    "NEG": "PART",
     # PUNCT
-    "PUNCT":"PUNCT",
-    "PUNC":"PUNCT"
+    "PUNCT": "PUNCT",
+    "PUNC": "PUNCT",
 }
 
-def _UD_Exception(w,tag):
-	if w=="การ" or w=="ความ":
-		return "NOUN"
-	return tag
 
-def _orchid_to_ud(tag):
-	_i=0
-	temp=[]
-	while _i<len(tag):
-		temp.append((tag[_i][0],_UD_Exception(tag[_i][0],_TAG_MAP_UD[tag[_i][1]])))
-		_i+=1
-	return temp
+def _UD_Exception(w: str, tag: str) -> str:
+    if w == "การ" or w == "ความ":
+        return "NOUN"
+
+    return tag
+
+
+def _orchid_to_ud(tag) -> List[Tuple[str, str]]:
+    _i = 0
+    temp = []
+    while _i < len(tag):
+        temp.append((tag[_i][0], _UD_Exception(tag[_i][0], _TAG_MAP_UD[tag[_i][1]])))
+        _i += 1
+
+    return temp
+
+
+def _artagger_tag(words: List[str], corpus: str = None) -> List[Tuple[str, str]]:
+    if not words:
+        return []
+
+    from artagger import Tagger
 
-def pos_tag(words, engine="perceptron", corpus="orchid"):
+    words_ = Tagger().tag(" ".join(words))
+
+    return [(word.word, word.tag) for word in words_]
+
+
+def pos_tag(
+    words: List[str], engine: str = "perceptron", corpus: str = "orchid"
+) -> List[Tuple[str, str]]:
     """
     Part of Speech tagging function.
 
@@ -121,41 +137,36 @@ def pos_tag(words, engine="perceptron", corpus="orchid"):
         * pud - Parallel Universal Dependencies (PUD) treebanks
     :return: returns a list of labels regarding which part of speech it is
     """
-    _corpus=corpus
-    _tag=[]
-    if corpus=="orchid_ud":
-        corpus="orchid"
+    _corpus = corpus
+    _tag = []
+    if corpus == "orchid_ud":
+        corpus = "orchid"
     if not words:
         return []
 
     if engine == "perceptron":
         from .perceptron import tag as tag_
     elif engine == "artagger":
-
-        def tag_(words, corpus=None):
-            if not words:
-                return []
-
-            from artagger import Tagger
-            words_ = Tagger().tag(" ".join(words))
-
-            return [(word.word, word.tag) for word in words_]
-
+        tag_ = _artagger_tag
     else:  # default, use "unigram" ("old") engine
         from .unigram import tag as tag_
-    _tag= tag_(words, corpus=corpus)
-    if _corpus=="orchid_ud":
-        _tag=_orchid_to_ud(_tag)
+    _tag = tag_(words, corpus=corpus)
+
+    if _corpus == "orchid_ud":
+        _tag = _orchid_to_ud(_tag)
+
     return _tag
 
 
-def pos_tag_sents(sentences, engine="perceptron", corpus="orchid"):
+def pos_tag_sents(
+    sentences: List[List[str]], engine: str = "perceptron", corpus: str = "orchid"
+) -> List[List[Tuple[str, str]]]:
     """
     Part of Speech tagging Sentence function.
 
-    :param list sentences: a list of tokenized sentences (a list of tokenized words in sentences)
+    :param list sentences: a list of lists of tokenized words
     :param str engine:
-        * unigram - unigram tagger 
+        * unigram - unigram tagger
         * perceptron - perceptron tagger (default)
         * artagger - RDR POS tagger
     :param str corpus:
diff --git a/pythainlp/tag/locations.py b/pythainlp/tag/locations.py
index 01bf3060c..74fb96e5d 100644
--- a/pythainlp/tag/locations.py
+++ b/pythainlp/tag/locations.py
@@ -3,10 +3,12 @@
 Recognizes locations in text
 """
 
+from typing import List, Tuple
+
 from pythainlp.corpus import provinces
 
 
-def tag_provinces(tokens):
+def tag_provinces(tokens: List[str]) -> List[Tuple[str, str]]:
     """
     Recognize Thailand provinces in text
 
diff --git a/pythainlp/tag/named_entity.py b/pythainlp/tag/named_entity.py
index a1236d171..dca5d18b8 100644
--- a/pythainlp/tag/named_entity.py
+++ b/pythainlp/tag/named_entity.py
@@ -5,20 +5,22 @@
 
 __all__ = ["ThaiNameTagger"]
 
+from typing import List, Tuple, Union
+
 import sklearn_crfsuite
 from pythainlp.corpus import download, get_corpus_path, thai_stopwords
 from pythainlp.tag import pos_tag
 from pythainlp.tokenize import word_tokenize
-from pythainlp.util import is_thaiword
+from pythainlp.util import isthai
 
 _WORD_TOKENIZER = "newmm"  # ตัวตัดคำ
 
 
-def _is_stopword(word):  # เช็คว่าเป็นคำฟุ่มเฟือย
+def _is_stopword(word: str) -> bool:  # เช็คว่าเป็นคำฟุ่มเฟือย
     return word in thai_stopwords()
 
 
-def _doc2features(doc, i):
+def _doc2features(doc, i) -> dict:
     word = doc[i][0]
     postag = doc[i][1]
 
@@ -26,7 +28,7 @@ def _doc2features(doc, i):
     features = {
         "word.word": word,
         "word.stopword": _is_stopword(word),
-        "word.isthai": is_thaiword(word),
+        "word.isthai": isthai(word),
         "word.isspace": word.isspace(),
         "postag": postag,
         "word.isdigit": word.isdigit(),
@@ -41,7 +43,7 @@ def _doc2features(doc, i):
         prev_features = {
             "word.prevword": prevword,
             "word.previsspace": prevword.isspace(),
-            "word.previsthai": is_thaiword(prevword),
+            "word.previsthai": isthai(prevword),
             "word.prevstopword": _is_stopword(prevword),
             "word.prevpostag": prevpostag,
             "word.prevwordisdigit": prevword.isdigit(),
@@ -58,7 +60,7 @@ def _doc2features(doc, i):
             "word.nextword": nextword,
             "word.nextisspace": nextword.isspace(),
             "word.nextpostag": nextpostag,
-            "word.nextisthai": is_thaiword(nextword),
+            "word.nextisthai": isthai(nextword),
             "word.nextstopword": _is_stopword(nextword),
             "word.nextwordisdigit": nextword.isdigit(),
         }
@@ -87,7 +89,9 @@ def __init__(self):
             model_filename=self.__data_path,
         )
 
-    def get_ner(self, text, pos=True):
+    def get_ner(
+        self, text: str, pos: bool = True
+    ) -> Union[List[Tuple[str, str]], List[Tuple[str, str, str]]]:
         """
         Get named-entities in text
 
@@ -101,10 +105,11 @@ def get_ner(self, text, pos=True):
             >>> ner = ThaiNameTagger()
             >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.")
             [('วันที่', 'NOUN', 'O'), (' ', 'PUNCT', 'O'), ('15', 'NUM', 'B-DATE'),
-            (' ', 'PUNCT', 'I-DATE'), ('ก.ย.', 'NOUN', 'I-DATE'), (' ', 'PUNCT', 'I-DATE'), 
-            ('61', 'NUM', 'I-DATE'), (' ', 'PUNCT', 'O'), ('ทดสอบ', 'VERB', 'O'), 
-            ('ระบบ', 'NOUN', 'O'), ('เวลา', 'NOUN', 'O'), (' ', 'PUNCT', 'O'), 
-            ('14', 'NOUN', 'B-TIME'), (':', 'PUNCT', 'I-TIME'), ('49', 'NUM', 'I-TIME'), 
+            (' ', 'PUNCT', 'I-DATE'), ('ก.ย.', 'NOUN', 'I-DATE'),
+            (' ', 'PUNCT', 'I-DATE'), ('61', 'NUM', 'I-DATE'),
+            (' ', 'PUNCT', 'O'), ('ทดสอบ', 'VERB', 'O'),
+            ('ระบบ', 'NOUN', 'O'), ('เวลา', 'NOUN', 'O'), (' ', 'PUNCT', 'O'),
+            ('14', 'NOUN', 'B-TIME'), (':', 'PUNCT', 'I-TIME'), ('49', 'NUM', 'I-TIME'),
             (' ', 'PUNCT', 'I-TIME'), ('น.', 'NOUN', 'I-TIME')]
             >>> ner.get_ner("วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.", pos=False)
             [('วันที่', 'O'), (' ', 'O'), ('15', 'B-DATE'), (' ', 'I-DATE'),
@@ -113,7 +118,9 @@ def get_ner(self, text, pos=True):
             (':', 'I-TIME'), ('49', 'I-TIME'), (' ', 'I-TIME'), ('น.', 'I-TIME')]
         """
         self.__tokens = word_tokenize(text, engine=_WORD_TOKENIZER)
-        self.__pos_tags = pos_tag(self.__tokens,engine="perceptron", corpus="orchid_ud")
+        self.__pos_tags = pos_tag(
+            self.__tokens, engine="perceptron", corpus="orchid_ud"
+        )
         self.__x_test = self.__extract_features(self.__pos_tags)
         self.__y = self.crf.predict_single(self.__x_test)
 
diff --git a/pythainlp/tag/perceptron.py b/pythainlp/tag/perceptron.py
index 4032df759..ccff12427 100644
--- a/pythainlp/tag/perceptron.py
+++ b/pythainlp/tag/perceptron.py
@@ -3,6 +3,7 @@
 Perceptron Part-Of-Speech tagger
 """
 import os
+from typing import List, Tuple
 
 import dill
 from pythainlp.corpus import corpus_path
@@ -22,127 +23,124 @@ def _load_tagger(filename):
 _PUD_TAGGER = _load_tagger(_PUD_DATA_FILENAME)
 
 
-def tag(words, corpus="pud"):
+def tag(words: List[str], corpus: str = "pud") -> List[Tuple[str, str]]:
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
     if not words:
         return []
 
-    # perceptron tagger cannot handle empty string
-    #words = [word.strip() for word in words if word.strip()]
-
     if corpus == "orchid":
         tagger = _ORCHID_TAGGER
-        i=0
-        while i<len(words):
-            if words[i]==" ":
-                words[i]="<space>"
-            elif words[i]=="+":
-                words[i]="<plus>"
-            elif words[i]=="-":
-                words[i]="<minus>"
-            elif words[i]=="=":
-                words[i]="<equal>"
-            elif words[i]==",":
-                words[i]="<comma>"
-            elif words[i]=="$":
-                words[i]="<dollar>"
-            elif words[i]==".":
-                words[i]="<full_stop>"
-            elif words[i]=="(":
-                words[i]="<left_parenthesis>"
-            elif words[i]==")":
-                words[i]="<right_parenthesis>"
-            elif words[i]=='"':
-                words[i]="<quotation>"
-            elif words[i]=='@':
-                words[i]="<at_mark>"
-            elif words[i]=='&':
-                words[i]="<ampersand>"
-            elif words[i]=='{':
-                words[i]="<left_curly_bracket>"
-            elif words[i]=='^':
-                words[i]="<circumflex_accent>"
-            elif words[i]=='?':
-                words[i]="<question_mark>"
-            elif words[i]=='<':
-                words[i]="<less_than>"
-            elif words[i]=='>':
-                words[i]="<greater_than>"
-            elif words[i]=='=':
-                words[i]="<equal>"
-            elif words[i]=='!':
-                words[i]="<exclamation>"
-            elif words[i]=='’':
-                words[i]="<apostrophe>"
-            elif words[i]==':':
-                words[i]="<colon>"
-            elif words[i]=='*':
-                words[i]="<asterisk>"
-            elif words[i]==';':
-                words[i]="<semi_colon>"
-            elif words[i]=='/':
-                words[i]="<slash>"
-            i+=1
-        t2=tagger.tag(words)
-        t=[]
-        i=0
-        while i<len(t2):
-            word=t2[i][0]
-            tag=t2[i][1]
-            if word=="<space>" or word=='<space>':
-                word=" "
-            elif word=="<plus>":
-                word="+"
-            elif word=="<minus>":
-                word="-"
-            elif word=="<equal>":
-                word="="
-            elif word=="<comma>":
-                word=","
-            elif word=="<dollar>":
-                word="$"
-            elif word=="<full_stop>":
-                word="."
-            elif word=="<left_parenthesis>":
-                word="("
-            elif word=="<right_parenthesis>":
-                word=")"
-            elif word=="<quotation>":
-                word='"'
-            elif word=="<at_mark>":
-                word='@'
-            elif word=="<ampersand>":
-                word='&'
-            elif word=="<left_curly_bracket>":
-                word='{'
-            elif word=="<circumflex_accent>":
-                word='^'
-            elif word=="<question_mark>":
-                word='?'
-            elif word=="<less_than>":
-                word='<'
-            elif word=="<greater_than>":
-                word='>'
-            elif word=="<equal>":
-                word='='
-            elif word=="<exclamation>":
-                word='!'
-            elif word=="<apostrophe>":
-                word='’'
-            elif word=="<colon>":
-                word=':'
-            elif word=="<asterisk>":
-                word='*'
-            elif word=="<semi_colon>":
-                word=';'
-            elif word=="<slash>":
-                word='/'
-            t.append((word,tag))
-            i+=1
-        #t=temp
+        i = 0
+        while i < len(words):
+            if words[i] == " ":
+                words[i] = "<space>"
+            elif words[i] == "+":
+                words[i] = "<plus>"
+            elif words[i] == "-":
+                words[i] = "<minus>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == ",":
+                words[i] = "<comma>"
+            elif words[i] == "$":
+                words[i] = "<dollar>"
+            elif words[i] == ".":
+                words[i] = "<full_stop>"
+            elif words[i] == "(":
+                words[i] = "<left_parenthesis>"
+            elif words[i] == ")":
+                words[i] = "<right_parenthesis>"
+            elif words[i] == '"':
+                words[i] = "<quotation>"
+            elif words[i] == "@":
+                words[i] = "<at_mark>"
+            elif words[i] == "&":
+                words[i] = "<ampersand>"
+            elif words[i] == "{":
+                words[i] = "<left_curly_bracket>"
+            elif words[i] == "^":
+                words[i] = "<circumflex_accent>"
+            elif words[i] == "?":
+                words[i] = "<question_mark>"
+            elif words[i] == "<":
+                words[i] = "<less_than>"
+            elif words[i] == ">":
+                words[i] = "<greater_than>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == "!":
+                words[i] = "<exclamation>"
+            elif words[i] == "’":
+                words[i] = "<apostrophe>"
+            elif words[i] == ":":
+                words[i] = "<colon>"
+            elif words[i] == "*":
+                words[i] = "<asterisk>"
+            elif words[i] == ";":
+                words[i] = "<semi_colon>"
+            elif words[i] == "/":
+                words[i] = "<slash>"
+            i += 1
+        t2 = tagger.tag(words)
+        t = []
+        i = 0
+        while i < len(t2):
+            word = t2[i][0]
+            tag = t2[i][1]
+            if word == "<space>":
+                word = " "
+            elif word == "<plus>":
+                word = "+"
+            elif word == "<minus>":
+                word = "-"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<comma>":
+                word = ","
+            elif word == "<dollar>":
+                word = "$"
+            elif word == "<full_stop>":
+                word = "."
+            elif word == "<left_parenthesis>":
+                word = "("
+            elif word == "<right_parenthesis>":
+                word = ")"
+            elif word == "<quotation>":
+                word = '"'
+            elif word == "<at_mark>":
+                word = "@"
+            elif word == "<ampersand>":
+                word = "&"
+            elif word == "<left_curly_bracket>":
+                word = "{"
+            elif word == "<circumflex_accent>":
+                word = "^"
+            elif word == "<question_mark>":
+                word = "?"
+            elif word == "<less_than>":
+                word = "<"
+            elif word == "<greater_than>":
+                word = ">"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<exclamation>":
+                word = "!"
+            elif word == "<apostrophe>":
+                word = "’"
+            elif word == "<colon>":
+                word = ":"
+            elif word == "<asterisk>":
+                word = "*"
+            elif word == "<semi_colon>":
+                word = ";"
+            elif word == "<slash>":
+                word = "/"
+            t.append((word, tag))
+            i += 1
     else:  # default, use "pud" as a corpus
         tagger = _PUD_TAGGER
-        t=tagger.tag(words)
+        t = tagger.tag(words)
+
     return t
diff --git a/pythainlp/tag/unigram.py b/pythainlp/tag/unigram.py
index 863323a1f..ece6e3028 100644
--- a/pythainlp/tag/unigram.py
+++ b/pythainlp/tag/unigram.py
@@ -4,6 +4,7 @@
 """
 import json
 import os
+from typing import List, Tuple
 
 import dill
 import nltk.tag
@@ -27,7 +28,7 @@ def _pud_tagger():
     return model
 
 
-def tag(words, corpus):
+def tag(words: List[str], corpus: str) -> List[Tuple[str, str]]:
     """
     รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('คำ', 'ชนิดคำ'), ('คำ', 'ชนิดคำ'), ...]
     """
@@ -36,116 +37,116 @@ def tag(words, corpus):
 
     if corpus == "orchid":
         tagger = nltk.tag.UnigramTagger(model=_orchid_tagger())
-        i=0
-        while i<len(words):
-            if words[i]==" ":
-                words[i]="<space>"
-            elif words[i]=="+":
-                words[i]="<plus>"
-            elif words[i]=="-":
-                words[i]="<minus>"
-            elif words[i]=="=":
-                words[i]="<equal>"
-            elif words[i]==",":
-                words[i]="<comma>"
-            elif words[i]=="$":
-                words[i]="<dollar>"
-            elif words[i]==".":
-                words[i]="<full_stop>"
-            elif words[i]=="(":
-                words[i]="<left_parenthesis>"
-            elif words[i]==")":
-                words[i]="<right_parenthesis>"
-            elif words[i]=='"':
-                words[i]="<quotation>"
-            elif words[i]=='@':
-                words[i]="<at_mark>"
-            elif words[i]=='&':
-                words[i]="<ampersand>"
-            elif words[i]=='{':
-                words[i]="<left_curly_bracket>"
-            elif words[i]=='^':
-                words[i]="<circumflex_accent>"
-            elif words[i]=='?':
-                words[i]="<question_mark>"
-            elif words[i]=='<':
-                words[i]="<less_than>"
-            elif words[i]=='>':
-                words[i]="<greater_than>"
-            elif words[i]=='=':
-                words[i]="<equal>"
-            elif words[i]=='!':
-                words[i]="<exclamation>"
-            elif words[i]=='’':
-                words[i]="<apostrophe>"
-            elif words[i]==':':
-                words[i]="<colon>"
-            elif words[i]=='*':
-                words[i]="<asterisk>"
-            elif words[i]==';':
-                words[i]="<semi_colon>"
-            elif words[i]=='/':
-                words[i]="<slash>"
-            i+=1
-        t=tagger.tag(words)
-        temp=[]
-        i=0
-        while i<len(t):
-            word=t[i][0]
-            tag=t[i][1]
-            if word=="<space>":
-                word=" "
-            elif word=="<plus>":
-                word="+"
-            elif word=="<minus>":
-                word="-"
-            elif word=="<equal>":
-                word="="
-            elif word=="<comma>":
-                word=","
-            elif word=="<dollar>":
-                word="$"
-            elif word=="<full_stop>":
-                word="."
-            elif word=="<left_parenthesis>":
-                word="("
-            elif word=="<right_parenthesis>":
-                word=")"
-            elif word=="<quotation>":
-                word='"'
-            elif word=="<at_mark>":
-                word='@'
-            elif word=="<ampersand>":
-                word='&'
-            elif word=="<left_curly_bracket>":
-                word='{'
-            elif word=="<circumflex_accent>":
-                word='^'
-            elif word=="<question_mark>":
-                word='?'
-            elif word=="<less_than>":
-                word='<'
-            elif word=="<greater_than>":
-                word='>'
-            elif word=="<equal>":
-                word='='
-            elif word=="<exclamation>":
-                word='!'
-            elif word=="<apostrophe>":
-                word='’'
-            elif word=="<colon>":
-                word=':'
-            elif word=="<asterisk>":
-                word='*'
-            elif word=="<semi_colon>":
-                word=';'
-            elif word=="<slash>":
-                word='/'
-            temp.append((word,tag))
-            i+=1
-        t=temp
+        i = 0
+        while i < len(words):
+            if words[i] == " ":
+                words[i] = "<space>"
+            elif words[i] == "+":
+                words[i] = "<plus>"
+            elif words[i] == "-":
+                words[i] = "<minus>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == ",":
+                words[i] = "<comma>"
+            elif words[i] == "$":
+                words[i] = "<dollar>"
+            elif words[i] == ".":
+                words[i] = "<full_stop>"
+            elif words[i] == "(":
+                words[i] = "<left_parenthesis>"
+            elif words[i] == ")":
+                words[i] = "<right_parenthesis>"
+            elif words[i] == '"':
+                words[i] = "<quotation>"
+            elif words[i] == "@":
+                words[i] = "<at_mark>"
+            elif words[i] == "&":
+                words[i] = "<ampersand>"
+            elif words[i] == "{":
+                words[i] = "<left_curly_bracket>"
+            elif words[i] == "^":
+                words[i] = "<circumflex_accent>"
+            elif words[i] == "?":
+                words[i] = "<question_mark>"
+            elif words[i] == "<":
+                words[i] = "<less_than>"
+            elif words[i] == ">":
+                words[i] = "<greater_than>"
+            elif words[i] == "=":
+                words[i] = "<equal>"
+            elif words[i] == "!":
+                words[i] = "<exclamation>"
+            elif words[i] == "’":
+                words[i] = "<apostrophe>"
+            elif words[i] == ":":
+                words[i] = "<colon>"
+            elif words[i] == "*":
+                words[i] = "<asterisk>"
+            elif words[i] == ";":
+                words[i] = "<semi_colon>"
+            elif words[i] == "/":
+                words[i] = "<slash>"
+            i += 1
+        t = tagger.tag(words)
+        temp = []
+        i = 0
+        while i < len(t):
+            word = t[i][0]
+            tag = t[i][1]
+            if word == "<space>":
+                word = " "
+            elif word == "<plus>":
+                word = "+"
+            elif word == "<minus>":
+                word = "-"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<comma>":
+                word = ","
+            elif word == "<dollar>":
+                word = "$"
+            elif word == "<full_stop>":
+                word = "."
+            elif word == "<left_parenthesis>":
+                word = "("
+            elif word == "<right_parenthesis>":
+                word = ")"
+            elif word == "<quotation>":
+                word = '"'
+            elif word == "<at_mark>":
+                word = "@"
+            elif word == "<ampersand>":
+                word = "&"
+            elif word == "<left_curly_bracket>":
+                word = "{"
+            elif word == "<circumflex_accent>":
+                word = "^"
+            elif word == "<question_mark>":
+                word = "?"
+            elif word == "<less_than>":
+                word = "<"
+            elif word == "<greater_than>":
+                word = ">"
+            elif word == "<equal>":
+                word = "="
+            elif word == "<exclamation>":
+                word = "!"
+            elif word == "<apostrophe>":
+                word = "’"
+            elif word == "<colon>":
+                word = ":"
+            elif word == "<asterisk>":
+                word = "*"
+            elif word == "<semi_colon>":
+                word = ";"
+            elif word == "<slash>":
+                word = "/"
+            temp.append((word, tag))
+            i += 1
+        t = temp
     else:
         tagger = _pud_tagger()
-        t=tagger.tag(words)
+        t = tagger.tag(words)
 
     return t
diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py
index d3c9bb1d5..296460857 100644
--- a/pythainlp/tokenize/__init__.py
+++ b/pythainlp/tokenize/__init__.py
@@ -3,6 +3,8 @@
 Thai tokenizers
 """
 import re
+from typing import Iterable, List, Union
+
 from pythainlp.corpus import get_corpus, thai_syllables, thai_words
 
 from marisa_trie import Trie
@@ -11,11 +13,13 @@
 FROZEN_DICT_TRIE = Trie(get_corpus("words_th_frozen_201810.txt"))
 
 
-def word_tokenize(text, engine="newmm", whitespaces=True):
+def word_tokenize(
+    text: str, engine: str = "newmm", whitespaces: bool = True
+) -> List[str]:
     """
     :param str text: text to be tokenized
     :param str engine: tokenizer to be used
-    :param bool whitespaces: True to output no whitespace, a common mark of sentence or end of phrase in Thai
+    :param bool whitespaces: True to output no whitespace, a common mark of end of phrase in Thai
     :Parameters for engine:
         * newmm (default) - dictionary-based, Maximum Matching + Thai Character Cluster
         * longest - dictionary-based, Longest Matching
@@ -60,7 +64,9 @@ def segment(text):
     return segment(text)
 
 
-def dict_word_tokenize(text, custom_dict, engine="newmm"):
+def dict_word_tokenize(
+    text: str, custom_dict: Trie, engine: str = "newmm"
+) -> List[str]:
     """
     :meth:`dict_word_tokenize` tokenizes word based on the dictionary you provide. The format has to be in trie data structure.
     :param str text: text to be tokenized
@@ -90,7 +96,7 @@ def dict_word_tokenize(text, custom_dict, engine="newmm"):
     return segment(text, custom_dict)
 
 
-def sent_tokenize(text, engine="whitespace+newline"):
+def sent_tokenize(text: str, engine: str = "whitespace+newline") -> List[str]:
     """
     This function does not yet automatically recognize when a sentence actually ends. Rather it helps split text where white space and a new line is found.
 
@@ -106,14 +112,14 @@ def sent_tokenize(text, engine="whitespace+newline"):
     sentences = []
 
     if engine == "whitespace":
-        sentences = re.split(r' +', text, re.U)
+        sentences = re.split(r" +", text, re.U)
     else:  # default, use whitespace + newline
         sentences = text.split()
 
     return sentences
 
 
-def subword_tokenize(text, engine="tcc"):
+def subword_tokenize(text: str, engine: str = "tcc") -> List[str]:
     """
     :param str text: text to be tokenized
     :param str engine: choosing 'tcc' uses the Thai Character Cluster rule to segment words into the smallest unique units.
@@ -127,7 +133,7 @@ def subword_tokenize(text, engine="tcc"):
     return tcc(text)
 
 
-def syllable_tokenize(text):
+def syllable_tokenize(text: str) -> List[str]:
     """
     :param str text: input string to be tokenized
 
@@ -147,7 +153,7 @@ def syllable_tokenize(text):
     return tokens
 
 
-def dict_trie(dict_source):
+def dict_trie(dict_source: Union[str, Iterable]) -> Trie:
     """
     Create a dict trie which will be used for word_tokenize() function.
     For more information on the trie data structure,
@@ -162,17 +168,19 @@ def dict_trie(dict_source):
         with open(dict_source, "r", encoding="utf8") as f:
             _vocabs = f.read().splitlines()
             return Trie(_vocabs)
-    elif isinstance(dict_source, (list, tuple, set, frozenset)):
+    elif isinstance(dict_source, Iterable):
         # Received a sequence type object of vocabs
         return Trie(dict_source)
     else:
         raise TypeError(
-            "Type of dict_source must be either str (path to source file) or collections"
+            "Type of dict_source must be either str (path to source file) or iterable"
         )
 
 
 class Tokenizer:
-    def __init__(self, custom_dict=None,tokenize_engine="newmm"):
+    def __init__(
+        self, custom_dict: Union[str, Iterable] = None, tokenize_engine: str = "newmm"
+    ):
         """
         Initialize tokenizer object
 
@@ -180,20 +188,24 @@ def __init__(self, custom_dict=None,tokenize_engine="newmm"):
         :param str tokenize_engine: choose between different options of engine to token (newmm, mm, longest)
         """
         self.__trie_dict = None
-        self.word_engine=tokenize_engine
+        self.word_engine = tokenize_engine
         if custom_dict:
             self.__trie_dict = dict_trie(custom_dict)
         else:
             self.__trie_dict = dict_trie(thai_words())
-    def word_tokenize(self, text):
+
+    def word_tokenize(self, text: str) -> List[str]:
         """
         :param str text: text to be tokenized
 
         :return: list of words, tokenized from the text
         """
-        return dict_word_tokenize(text,custom_dict=self.__trie_dict,engine=self.word_engine)
-    def set_tokenize_engine(self,name_engine):
+        return dict_word_tokenize(
+            text, custom_dict=self.__trie_dict, engine=self.word_engine
+        )
+
+    def set_tokenize_engine(self, name_engine: str) -> None:
         """
         :param str name_engine: choose between different options of engine to token (newmm, mm, longest)
         """
-        self.word_engine=name_engine
\ No newline at end of file
+        self.word_engine = name_engine
diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
index 395e76583..a3844c2f3 100644
--- a/pythainlp/tokenize/deepcut.py
+++ b/pythainlp/tokenize/deepcut.py
@@ -3,8 +3,10 @@
 Wrapper for deepcut Thai word segmentation
 """
 
+from typing import List
+
 import deepcut
 
 
-def segment(text):
+def segment(text: str) -> List[str]:
     return deepcut.tokenize(text)
diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py
index dbe04122a..986878001 100644
--- a/pythainlp/tokenize/etcc.py
+++ b/pythainlp/tokenize/etcc.py
@@ -20,7 +20,7 @@
 _UV2 = "[" + "".join(["ั", "ื"]) + "]"
 
 
-def etcc(text):
+def etcc(text: str) -> str:
     """
     Enhanced Thai Character Cluster (ETCC)
 
diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
index 33ff1fa0a..83ce495a1 100644
--- a/pythainlp/tokenize/longest.py
+++ b/pythainlp/tokenize/longest.py
@@ -39,14 +39,13 @@ class LongestMatchTokenizer(object):
     def __init__(self, trie):
         self.__trie = trie
 
-    def __search_nonthai(self, text):
+    def __search_nonthai(self, text: str):
         match = _RE_NONTHAI.search(text)
         if match.group(0):
             return match.group(0).lower()
-        else:
-            return None
+        return None
 
-    def __is_next_word_valid(self, text, begin_pos):
+    def __is_next_word_valid(self, text: str, begin_pos: int) -> bool:
         len_text = len(text)
         text = text[begin_pos:len_text].strip()
 
@@ -63,7 +62,7 @@ def __is_next_word_valid(self, text, begin_pos):
 
         return False
 
-    def __longest_matching(self, text, begin_pos):
+    def __longest_matching(self, text: str, begin_pos: int):
         len_text = len(text)
         text = text[begin_pos:len_text]
 
@@ -94,7 +93,7 @@ def __longest_matching(self, text, begin_pos):
         else:
             return ""
 
-    def __segment_text(self, text):
+    def __segment_text(self, text: str):
         if not text:
             return []
 
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
index 17815fd9f..066ff1017 100644
--- a/pythainlp/tokenize/newmm.py
+++ b/pythainlp/tokenize/newmm.py
@@ -9,6 +9,7 @@
 import re
 from collections import defaultdict
 from heapq import heappop, heappush  # for priority queue
+from typing import List
 
 from pythainlp.tokenize import DEFAULT_DICT_TRIE
 
@@ -38,7 +39,7 @@ def bfs_paths_graph(graph, start, goal):
                 queue.append((next, path + [next]))
 
 
-def onecut(text, trie):
+def onecut(text: str, trie):
     graph = defaultdict(list)  # main data structure
     allow_pos = tcc_pos(text)  # ตำแหน่งที่ตัด ต้องตรงกับ tcc
 
@@ -90,7 +91,7 @@ def onecut(text, trie):
 
 
 # ช่วยให้ไม่ต้องพิมพ์ยาวๆ
-def segment(text, trie=None):
+def segment(text: str, trie=None) -> List[str]:
     if not text:
         return []
 
diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py
index 23b7b38e4..33fc0aabc 100644
--- a/pythainlp/tokenize/pyicu.py
+++ b/pythainlp/tokenize/pyicu.py
@@ -3,11 +3,12 @@
 Wrapper for ICU word segmentation
 """
 import re
+from typing import List
 
 from icu import BreakIterator, Locale
 
 
-def _gen_words(text):
+def _gen_words(text: str) -> str:
     bd = BreakIterator.createWordInstance(Locale("th"))
     bd.setText(text)
     p = bd.first()
@@ -16,7 +17,7 @@ def _gen_words(text):
         p = q
 
 
-def segment(text):
+def segment(text: str) -> List[str]:
     if not text:
         return []
 
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
index b50bdb24a..8ef125217 100644
--- a/pythainlp/tokenize/tcc.py
+++ b/pythainlp/tokenize/tcc.py
@@ -10,6 +10,7 @@
 - Python code: Korakot Chaovavanich
 """
 import re
+from typing import List, Set
 
 RE_TCC = (
     """\
@@ -47,9 +48,9 @@
 PAT_TCC = re.compile("|".join(RE_TCC))
 
 
-def tcc_gen(w):
+def tcc_gen(w: str) -> str:
     if not w:
-        return ''
+        return ""
 
     p = 0
     while p < len(w):
@@ -62,7 +63,7 @@ def tcc_gen(w):
         p += n
 
 
-def tcc_pos(text):
+def tcc_pos(text: str) -> Set[int]:
     if not text:
         return set()
 
@@ -75,8 +76,5 @@ def tcc_pos(text):
     return p_set
 
 
-def tcc(text, sep="/"):
-    if not text:
-        return ""
-
-    return sep.join(tcc_gen(text))
+def tcc(text: str) -> List[str]:
+    return list(tcc_gen(text))
diff --git a/pythainlp/tools/__init__.py b/pythainlp/tools/__init__.py
index e2487e582..5f7a5a5cb 100644
--- a/pythainlp/tools/__init__.py
+++ b/pythainlp/tools/__init__.py
@@ -5,19 +5,20 @@
 For text processing and text conversion, see pythainlp.util
 """
 import os
-import sys
+
 import pythainlp
 
 PYTHAINLP_DATA_DIR = "pythainlp-data"
 
-def get_full_data_path(path):
+
+def get_full_data_path(path: str) -> str:
     """
     Get filename/path of a dataset, return full path of that filename/path
     """
     return os.path.join(get_pythainlp_data_path(), path)
 
 
-def get_pythainlp_data_path():
+def get_pythainlp_data_path() -> str:
     """
     Return full path where PyThaiNLP keeps its (downloaded) data
     """
@@ -27,7 +28,7 @@ def get_pythainlp_data_path():
     return path
 
 
-def get_pythainlp_path():
+def get_pythainlp_path() -> str:
     """
     Return full path of PyThaiNLP code
     """
diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py
index df96b0360..91435cc54 100644
--- a/pythainlp/transliterate/__init__.py
+++ b/pythainlp/transliterate/__init__.py
@@ -3,15 +3,15 @@
 from pythainlp.tokenize import word_tokenize
 
 
-# ถอดเสียงภาษาไทยเป็นอักษรละติน
-def romanize(text, engine="royin"):
+def romanize(text: str, engine: str = "royin") -> str:
     """
+    ถอดเสียงภาษาไทยเป็นอักษรละติน
     :param str text: Thai text to be romanized
     :param str engine: 'royin' (default) or 'thai2rom'. 'royin' uses Thai Royal Institute standard. 'thai2rom' is deep learning Thai romanization (require keras).
     :return: English (more or less) text that spells out how the Thai text should read.
     """
 
-    if isinstance(text,str)==False:
+    if not isinstance(text, str) or not text:
         return ""
 
     if engine == "thai2rom":
@@ -21,22 +21,20 @@ def romanize(text, engine="royin"):
     else:  # use default engine "royin"
         from .royin import romanize
 
-        try:
-            words = word_tokenize(text)
-            romanized_words = [romanize(word) for word in words]
-        except:
-            romanized_words =[romanize(text)]
+        words = word_tokenize(text)
+        romanized_words = [romanize(word) for word in words]
+
         return "".join(romanized_words)
 
 
-def transliterate(text, engine="ipa"):
+def transliterate(text: str, engine: str = "ipa") -> str:
     """
     :param str text: Thai text to be transliterated
     :param str engine: 'ipa' (default) or 'pyicu'.
     :return: A string of Internaitonal Phonetic Alphabets indicating how the text should read.
     """
 
-    if not text:
+    if not isinstance(text, str) or not text:
         return ""
 
     if engine == "pyicu":
diff --git a/pythainlp/transliterate/ipa.py b/pythainlp/transliterate/ipa.py
index 5fe18d24d..be7c1e1c6 100644
--- a/pythainlp/transliterate/ipa.py
+++ b/pythainlp/transliterate/ipa.py
@@ -7,7 +7,7 @@
 _EPI_THA = epitran.Epitran("tha-Thai")
 
 
-def transliterate(text):
+def transliterate(text: str) -> str:
     return _EPI_THA.transliterate(text)
 
 
diff --git a/pythainlp/transliterate/pyicu.py b/pythainlp/transliterate/pyicu.py
index e34be0e16..5e4a755aa 100644
--- a/pythainlp/transliterate/pyicu.py
+++ b/pythainlp/transliterate/pyicu.py
@@ -6,7 +6,7 @@
 
 
 # ถอดเสียงภาษาไทยเป็นอักษรละติน
-def transliterate(text):
+def transliterate(text: str) -> str:
     """
     ถอดเสียงภาษาไทยเป็นอักษรละติน รับค่า ''str'' ข้อความ คืนค่า ''str'' อักษรละติน
     """
diff --git a/pythainlp/transliterate/royin.py b/pythainlp/transliterate/royin.py
index 62e44783b..d6f6f71c8 100644
--- a/pythainlp/transliterate/royin.py
+++ b/pythainlp/transliterate/royin.py
@@ -117,20 +117,20 @@
 )
 
 
-def _normalize(text):
+def _normalize(text: str) -> str:
     """ตัดอักษรที่ไม่ออกเสียง (การันต์ ไปยาลน้อย ไม้ยมก*) และวรรณยุกต์ทิ้ง"""
     return _RE_NORMALIZE.sub("", text)
 
 
-def _replace_vowels(word):
+def _replace_vowels(word: str) -> str:
     for vowel in _VOWELS:
         word = re.sub(vowel[0], vowel[1], word)
 
     return word
 
 
-def _replace_consonants(word, res):
-    if res is None:
+def _replace_consonants(word: str, res: str) -> str:
+    if not res:
         pass
     elif len(res) == 1:
         word = word.replace(res[0], _CONSONANTS[res[0]][0])
@@ -162,9 +162,10 @@ def _replace_consonants(word, res):
     return word
 
 
-def romanize(word):
-    if isinstance(word,str)==False:
+def romanize(word: str) -> str:
+    if not isinstance(word, str) or not word:
         return ""
+
     word2 = _replace_vowels(_normalize(word))
     res = _RE_CONSONANT.findall(word2)
 
@@ -175,5 +176,5 @@ def romanize(word):
         word2 = "".join(word2)
 
     word2 = _replace_consonants(word2, res)
-
+    
     return word2
\ No newline at end of file
diff --git a/pythainlp/transliterate/thai2rom.py b/pythainlp/transliterate/thai2rom.py
index 49a498d83..1dc5a5267 100644
--- a/pythainlp/transliterate/thai2rom.py
+++ b/pythainlp/transliterate/thai2rom.py
@@ -157,5 +157,5 @@ def romanize(self, text):
 _THAI_TO_ROM = ThaiTransliterator()
 
 
-def romanize(text):
+def romanize(text: str) -> str:
     return _THAI_TO_ROM.romanize(text)
diff --git a/pythainlp/ulmfit/__init__.py b/pythainlp/ulmfit/__init__.py
index ab56c81ce..00c9f8891 100644
--- a/pythainlp/ulmfit/__init__.py
+++ b/pythainlp/ulmfit/__init__.py
@@ -35,8 +35,9 @@
 _MODEL_NAME_LSTM = "wiki_lm_lstm"
 _ITOS_NAME_LSTM = "wiki_itos_lstm"
 
+
 # Download pretrained models
-def _get_path(fname):
+def _get_path(fname: str) -> str:
     """
     :meth: download get path of file from pythainlp-corpus
     :param str fname: file name
@@ -56,7 +57,7 @@ class ThaiTokenizer(BaseTokenizer):
     https://docs.fast.ai/text.transform#BaseTokenizer
     """
 
-    def __init__(self, lang = "th"):
+    def __init__(self, lang="th"):
         self.lang = lang
 
     def tokenizer(self, t):
@@ -94,6 +95,7 @@ def rm_brackets(t):
     new_line = re.sub(r"\[\]", "", new_line)
     return new_line
 
+
 def ungroup_emoji(toks):
     "Ungroup emojis"
     res = []
@@ -105,6 +107,7 @@ def ungroup_emoji(toks):
             res.append(tok)
     return res
 
+
 def lowercase_all(toks):
     "lowercase all English words"
     return [tok.lower() for tok in toks]
@@ -112,17 +115,26 @@ def lowercase_all(toks):
 
 # Pretrained paths
 # TODO: Let the user decide if they like to download (at setup?)
-_THWIKI_LSTM = dict(wgts_fname=_get_path(_MODEL_NAME_LSTM), itos_fname=_get_path(_ITOS_NAME_LSTM))
+_THWIKI_LSTM = dict(
+    wgts_fname=_get_path(_MODEL_NAME_LSTM), itos_fname=_get_path(_ITOS_NAME_LSTM)
+)
 
 # Preprocessing rules for Thai text
-pre_rules_th = [fix_html, replace_rep_after, normalize_char_order, 
-                spec_add_spaces, rm_useless_spaces, rm_useless_newlines, rm_brackets]
+pre_rules_th = [
+    fix_html,
+    replace_rep_after,
+    normalize_char_order,
+    spec_add_spaces,
+    rm_useless_spaces,
+    rm_useless_newlines,
+    rm_brackets,
+]
 post_rules_th = [replace_all_caps, ungroup_emoji, lowercase_all]
 
 _tokenizer = ThaiTokenizer()
 
 
-def document_vector(text, learn, data, agg='mean'):
+def document_vector(text, learn, data, agg="mean"):
     """
     :meth: `document_vector` get document vector using fastai language model and data bunch
     :param str text: text to extract embeddings
@@ -131,18 +143,18 @@ def document_vector(text, learn, data, agg='mean'):
     :param agg: how to aggregate embeddings
     :return: `numpy.array` of document vector sized 400 based on the encoder of the model
     """
-    
+
     s = _tokenizer.tokenizer(text)
     t = torch.tensor(data.vocab.numericalize(s), requires_grad=False).to(device)
     m = learn.model[0].encoder.to(device)
     res = m(t).cpu().detach().numpy()
-    if agg == 'mean':
+    if agg == "mean":
         res = res.mean(0)
-    elif agg == 'sum':
+    elif agg == "sum":
         res = res.sum(0)
     else:
-        raise ValueError('Aggregate by mean or sum')
-    return(res)
+        raise ValueError("Aggregate by mean or sum")
+    return res
 
 
 def merge_wgts(em_sz, wgts, itos_pre, itos_new):
diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
index b7e194436..6a4ff0ce6 100644
--- a/pythainlp/util/__init__.py
+++ b/pythainlp/util/__init__.py
@@ -11,9 +11,9 @@
     "digit_to_text",
     "eng_to_thai",
     "find_keyword",
-    "is_thai",
-    "is_thaichar",
-    "is_thaiword",
+    "countthai",
+    "isthai",
+    "isthaichar",
     "normalize",
     "now_reign_year",
     "num_to_thaiword",
@@ -42,6 +42,6 @@
 from .keywords import find_keyword, rank
 from .normalize import deletetone, normalize
 from .numtoword import bahttext, num_to_thaiword
-from .thai import is_thai, is_thaichar, is_thaiword
+from .thai import countthai, isthai, isthaichar
+from .thaiwordcheck import thaicheck
 from .wordtonum import thaiword_to_num
-from .thaiwordcheck import thaicheck
\ No newline at end of file
diff --git a/pythainlp/util/collate.py b/pythainlp/util/collate.py
index bc35c2fe9..ffaff4998 100644
--- a/pythainlp/util/collate.py
+++ b/pythainlp/util/collate.py
@@ -4,25 +4,27 @@
 Simple implementation using regular expressions
 """
 import re
+from typing import Iterable, List
 
 _RE_TONE = re.compile(r"[็-์]")
 _RE_LV_C = re.compile(r"([เ-ไ])([ก-ฮ])")
 
 
-def _thkey(word):
+def _thkey(word: str) -> str:
     cv = _RE_TONE.sub("", word)  # remove tone
     cv = _RE_LV_C.sub("\\2\\1", cv)  # switch lead vowel
     tone = _RE_TONE.sub(" ", word)  # just tone
     return cv + tone
 
 
-def collate(data):
+def collate(data: Iterable, reverse: bool = False) -> List[str]:
     """
-    :param list data: a list of strings
+    :param list data: a list of strings to be sorted
+    :param bool reverse: reverse flag, set to get the result in descending order
     :return: a list of strings, sorted alphabetically, according to Thai rules
     **Example**::
         >>> from pythainlp.util import *
         >>> collate(['ไก่', 'เป็ด', 'หมู', 'วัว'])
         ['ไก่', 'เป็ด', 'วัว', 'หมู']
     """
-    return sorted(data, key=_thkey)
+    return sorted(data, key=_thkey, reverse=reverse)
diff --git a/pythainlp/util/date.py b/pythainlp/util/date.py
index f2d2ee15b..903e42fd4 100644
--- a/pythainlp/util/date.py
+++ b/pythainlp/util/date.py
@@ -63,7 +63,7 @@
 
 
 # Conversion support for thai_strftime()
-def _thai_strftime(datetime, fmt_c):
+def _thai_strftime(datetime, fmt_c: str) -> str:
     text = ""
     if fmt_c == "a":  # abbreviated weekday
         text = thai_abbr_weekdays[datetime.weekday()]
@@ -73,7 +73,7 @@ def _thai_strftime(datetime, fmt_c):
         text = thai_abbr_months[datetime.month - 1]
     elif fmt_c == "B":  # full month
         text = thai_full_months[datetime.month - 1]
-    elif fmt_c == "y":  #  # year without century
+    elif fmt_c == "y":  # year without century
         text = str(datetime.year + 543)[2:4]
     elif fmt_c == "Y":  # year with century
         text = str(datetime.year + 543)
@@ -97,7 +97,7 @@ def _thai_strftime(datetime, fmt_c):
     return text
 
 
-def thai_strftime(datetime, fmt, thaidigit=False):
+def thai_strftime(datetime, fmt: str, thaidigit=False) -> str:
     """
     Thai date and time string formatter
 
@@ -126,7 +126,7 @@ def thai_strftime(datetime, fmt, thaidigit=False):
     If supported, we can just locale.setlocale(locale.LC_TIME, "th_TH") and
     then use native datetime.strftime().
 
-    :return: Date and time spelled out in text, with month in Thai name and year in Thai Buddhist Era (BE).
+    :return: Date and time spelled out, with day and month names in Thai and year in Thai Buddhist Era (BE).
     """
     thaidate_parts = []
 
diff --git a/pythainlp/util/digitconv.py b/pythainlp/util/digitconv.py
index 16e634833..3982168d6 100644
--- a/pythainlp/util/digitconv.py
+++ b/pythainlp/util/digitconv.py
@@ -56,7 +56,7 @@
 }
 
 
-def thai_digit_to_arabic_digit(text):
+def thai_digit_to_arabic_digit(text: str) -> str:
     """
     :param str text: Text with Thai digits such as '๑', '๒', '๓'
     :return: Text with Thai digits being converted to Arabic digits such as '1', '2', '3'
@@ -74,7 +74,7 @@ def thai_digit_to_arabic_digit(text):
     return "".join(newtext)
 
 
-def arabic_digit_to_thai_digit(text):
+def arabic_digit_to_thai_digit(text: str) -> str:
     """
     :param str text: Text with Arabic digits such as '1', '2', '3'
     :return: Text with Arabic digits being converted to Thai digits such as '๑', '๒', '๓'
@@ -92,7 +92,7 @@ def arabic_digit_to_thai_digit(text):
     return "".join(newtext)
 
 
-def digit_to_text(text):
+def digit_to_text(text: str) -> str:
     """
     :param str text: Text with digits such as '1', '2', '๓', '๔'
     :return: Text with digits being spelled out in Thai
@@ -113,7 +113,7 @@ def digit_to_text(text):
     return "".join(newtext)
 
 
-def text_to_arabic_digit(text):
+def text_to_arabic_digit(text: str) -> str:
     """
     :param text: A digit spelled out in Thai
     :return: An Arabic digit such as '1', '2', '3'
@@ -124,7 +124,7 @@ def text_to_arabic_digit(text):
     return _spell_digit[text]
 
 
-def text_to_thai_digit(text):
+def text_to_thai_digit(text: str) -> str:
     """
     :param text: A digit spelled out in Thai
     :return: A Thai digit such as '๑', '๒', '๓'
diff --git a/pythainlp/util/keyboard.py b/pythainlp/util/keyboard.py
index 8fb4abc6e..ad156715d 100644
--- a/pythainlp/util/keyboard.py
+++ b/pythainlp/util/keyboard.py
@@ -101,7 +101,7 @@
 TH_EN_KEYB_PAIRS = {v: k for k, v in EN_TH_KEYB_PAIRS.items()}
 
 
-def eng_to_thai(text):
+def eng_to_thai(text: str) -> str:
     """
     Correct text in one language that is incorrectly-typed with a keyboard layout in another language. (type Thai with English keyboard)
 
@@ -113,7 +113,7 @@ def eng_to_thai(text):
     )
 
 
-def thai_to_eng(text):
+def thai_to_eng(text: str) -> str:
     """
     Correct text in one language that is incorrectly-typed with a keyboard layout in another language. (type Thai with English keyboard)
 
diff --git a/pythainlp/util/normalize.py b/pythainlp/util/normalize.py
index 66c179fb9..3e05a2c69 100644
--- a/pythainlp/util/normalize.py
+++ b/pythainlp/util/normalize.py
@@ -44,7 +44,7 @@
 ]  # เก็บพวก พิมพ์ลำดับผิดหรือผิดแป้นแต่กลับแสดงผลถูกต้อง ให้ไปเป็นแป้นที่ถูกต้อง เช่น เ + เ ไปเป็น แ
 
 
-def normalize(text):
+def normalize(text: str) -> str:
     """
     Thai text normalize
 
@@ -61,7 +61,7 @@ def normalize(text):
     return text
 
 
-def deletetone(text):
+def deletetone(text: str) -> str:
     """
     Remove tonemarks
 
diff --git a/pythainlp/util/numtoword.py b/pythainlp/util/numtoword.py
index 394984d70..68519cb79 100644
--- a/pythainlp/util/numtoword.py
+++ b/pythainlp/util/numtoword.py
@@ -10,7 +10,7 @@
 __all__ = ["bahttext", "num_to_thaiword"]
 
 
-def bahttext(number):
+def bahttext(number: float) -> str:
     """
     Converts a number to Thai text and adds a suffix of "Baht" currency.
     Precision will be fixed at two decimal places (0.00) to fits "Satang" unit.
@@ -41,9 +41,9 @@ def bahttext(number):
     return ret
 
 
-def num_to_thaiword(number):
+def num_to_thaiword(number: int) -> str:
     """
-    :param float number: a float number (with decimals) indicating a quantity
+    :param int number: a float number (with decimals) indicating a quantity
     :return: a text that indicates the full amount in word form, properly ending each digit with the right term.
     """
     ret = ""
diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py
index f6b8f3d58..70e5a9d15 100644
--- a/pythainlp/util/thai.py
+++ b/pythainlp/util/thai.py
@@ -2,11 +2,15 @@
 """
 Check if it is Thai text
 """
+import string
 
+_DEFAULT_IGNORE_CHARS = string.whitespace + string.digits + string.punctuation
 
-def is_thaichar(ch):  # เป็นอักษรไทยหรือไม่
+
+def isthaichar(ch: str) -> bool:
     """
-    Check if character is Thai
+    Check if a character is Thai
+    เป็นอักษรไทยหรือไม่
 
     :param str ch: input character
     :return: True or False
@@ -17,45 +21,44 @@ def is_thaichar(ch):  # เป็นอักษรไทยหรือไม
     return False
 
 
-def is_thaiword(word):  # เป็นคำที่มีแต่อักษรไทยหรือไม่
+def isthai(word: str, ignore_chars: str = ".") -> bool:
     """
     Check if all character is Thai
+    เป็นคำที่มีแต่อักษรไทยหรือไม่
 
     :param str word: input text
+    :param str ignore_chars: characters to be ignored (i.e. will be considered as Thai)
     :return: True or False
     """
+    if not ignore_chars:
+        ignore_chars = ""
+
     for ch in word:
-        if ch != "." and not is_thaichar(ch):
+        if ch not in ignore_chars and not isthaichar(ch):
             return False
     return True
 
 
-def is_thai(text, check_all=False):
+def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float:
     """
-    :param str text: input string or list of strings
-    :param bool check_all: checks all character or not
-
-    :return: A dictionary with the first value as proportional of text that is Thai, and the second value being a tuple of all characters, along with true or false.
+    :param str text: input text
+    :return: float, proportion of characters in the text that is Thai character
     """
-    isthais = []
-    num_isthai = 0
+    if not text:
+        return 0
+
+    if not ignore_chars:
+        ignore_chars = ""
+
+    num_thai = 0
+    num_ignore = 0
 
     for ch in text:
-        ch_val = ord(ch)
-        if ch_val >= 3584 and ch_val <= 3711:
-            num_isthai += 1
-            if check_all:
-                isthais.append(True)
-        else:
-            if check_all:
-                isthais.append(False)
-    thai_percent = (num_isthai / len(text)) * 100
-
-    if check_all:
-        chars = list(text)
-        isthai_pairs = tuple(zip(chars, isthais))
-        data = {"thai": thai_percent, "check_all": isthai_pairs}
-    else:
-        data = {"thai": thai_percent}
-
-    return data
+        if ch in ignore_chars:
+            num_ignore += 1
+        elif isthaichar(ch):
+            num_thai += 1
+
+    num_count = len(text) - num_ignore
+
+    return (num_thai / num_count) * 100
diff --git a/pythainlp/util/thaiwordcheck.py b/pythainlp/util/thaiwordcheck.py
index d2a036370..7237d2db6 100644
--- a/pythainlp/util/thaiwordcheck.py
+++ b/pythainlp/util/thaiwordcheck.py
@@ -1,51 +1,76 @@
 # -*- coding: utf-8 -*-
-'''
-From https://github.com/wannaphongcom/open-thai-nlp-document/blob/master/check_thai_word.md
-'''
+"""
+From
+https://github.com/wannaphongcom/open-thai-nlp-document/blob/master/check_thai_word.md
+"""
 import re
 
-def _check1(word): # เช็คตัวสะกดว่าตรงตามมาตราไหม
-	if word in ['ก','ด','บ','น','ง','ม','ย','ว']:
-		return True
-	else:
-		return False
-def _check2(word): # เช็คตัวการันต์ ถ้ามี ไม่ใช่คำไทยแท้
-	if '์' in word:
-		return False
-	else:
-		return True
-def _check3(word):
-	if word in list("ฆณฌฎฏฐฑฒธศษฬ"): # ถ้ามี แสดงว่าไม่ใช่คำไทยแท้
-		return False
-	else:
-		return True
-def thaicheck(word):
-	"""
-	Check is Thai Word
-
-	:param str word: word
-	:return: True or False
-	"""
-	pattern = re.compile(r"[ก-ฬฮ]",re.U) # สำหรับตรวจสอบพยัญชนะ
-	res = re.findall(pattern,word) # ดึงพยัญชนะทัั้งหมดออกมา
-	if res==[]:
-		return False
-	elif _check1(res[len(res)-1]) or len(res)==1:
-		if _check2(word):
-			word2=list(word)
-			i=0
-			thai=True
-			if word in ['ฆ่า','เฆี่ยน','ศึก','ศอก','เศิก','เศร้า','ธ','ณ','ฯพณฯ','ใหญ่','หญ้า','ควาย','ความ','กริ่งเกรง','ผลิ']: # ข้อยกเว้น คำเหล่านี้เป็นคำไทยแท้
-				return True
-			while i<len(word2) and thai==True:
-				thai= _check3(word2[i])
-				if thai==False:
-					return False
-				i+=1
-			return True
-		else:
-			return False
-	elif word in ['กะ','กระ','ปะ','ประ']:
-		return True
-	else:
-		return False
+
+def _check1(word: str) -> bool:  # เช็คตัวสะกดว่าตรงตามมาตราไหม
+    if word in ["ก", "ด", "บ", "น", "ง", "ม", "ย", "ว"]:
+        return True
+    return False
+
+
+def _check2(word: str) -> bool:  # เช็คตัวการันต์ ถ้ามี ไม่ใช่คำไทยแท้
+    if "์" in word:
+        return False
+    return True
+
+
+def _check3(word: str) -> bool:
+    if word in list("ฆณฌฎฏฐฑฒธศษฬ"):  # ถ้ามี แสดงว่าไม่ใช่คำไทยแท้
+        return False
+    return True
+
+
+def thaicheck(word: str) -> bool:
+    """
+    Check if a word is an "authentic Thai word"
+
+    :param str word: word
+    :return: True or False
+    """
+    pattern = re.compile(r"[ก-ฬฮ]", re.U)  # สำหรับตรวจสอบพยัญชนะ
+    res = re.findall(pattern, word)  # ดึงพยัญชนะทัั้งหมดออกมา
+
+    if res == []:
+        return False
+
+    if _check1(res[len(res) - 1]) or len(res) == 1:
+        if _check2(word):
+            word2 = list(word)
+            i = 0
+            thai = True
+            if word in [
+                "ฆ่า",
+                "เฆี่ยน",
+                "ศึก",
+                "ศอก",
+                "เศิก",
+                "เศร้า",
+                "ธ",
+                "ณ",
+                "ฯพณฯ",
+                "ใหญ่",
+                "หญ้า",
+                "ควาย",
+                "ความ",
+                "กริ่งเกรง",
+                "ผลิ",
+            ]:  # ข้อยกเว้น คำเหล่านี้เป็นคำไทยแท้
+                return True
+
+            while i < len(word2) and thai:
+                thai = _check3(word2[i])
+                if not thai:
+                    return False
+                i += 1
+            return True
+
+        return False
+
+    if word in ["กะ", "กระ", "ปะ", "ประ"]:
+        return True
+
+    return False
diff --git a/pythainlp/util/wordtonum.py b/pythainlp/util/wordtonum.py
index 7521ec156..43305d329 100644
--- a/pythainlp/util/wordtonum.py
+++ b/pythainlp/util/wordtonum.py
@@ -6,6 +6,7 @@
 https://colab.research.google.com/drive/148WNIeclf0kOU6QxKd6pcfwpSs8l-VKD#scrollTo=EuVDd0nNuI8Q
 """
 import re
+from typing import Iterable, List
 
 from pythainlp.tokenize import Tokenizer
 
@@ -39,7 +40,7 @@
 _TOKENIZER = Tokenizer(custom_dict=_THAIWORD_NUMS_UNITS)
 
 
-def _thaiword_to_num(tokens):
+def _thaiword_to_num(tokens: List[str]) -> int:
     if not tokens:
         return None
 
@@ -65,21 +66,21 @@ def _thaiword_to_num(tokens):
         return _THAI_INT_MAP[a] * _THAI_INT_MAP[b] + _thaiword_to_num(tokens[2:])
 
 
-def thaiword_to_num(thaiword):
+def thaiword_to_num(word: str) -> int:
     """
-    Converts a thai word to number
+    Converts a Thai number spellout word to actual number value
 
-    :param str thaiword: input thai word
+    :param str word: a Thai number spellout
     :return: number
     """
-    if not thaiword:
+    if not word:
         return None
 
     tokens = []
-    if isinstance(thaiword,str):
-        tokens = _TOKENIZER.word_tokenize(thaiword)
-    elif isinstance(thaiword,list) or isinstance(thaiword,tuple) or isinstance(thaiword,set) or isinstance(thaiword,frozenset):
-        for w in thaiword:
+    if isinstance(word, str):
+        tokens = _TOKENIZER.word_tokenize(word)
+    elif isinstance(word, Iterable):
+        for w in word:
             tokens.extend(_TOKENIZER.word_tokenize(w))
 
     res = []
diff --git a/pythainlp/word_vector/__init__.py b/pythainlp/word_vector/__init__.py
index d035e5395..d1da4a2e3 100644
--- a/pythainlp/word_vector/__init__.py
+++ b/pythainlp/word_vector/__init__.py
@@ -4,6 +4,8 @@
 thai2fit - Thai word vector
 Code by https://github.com/cstorm125/thai2fit
 """
+from typing import List
+
 import numpy as np
 from gensim.models import KeyedVectors
 from pythainlp.corpus import download as download_data
@@ -13,7 +15,7 @@
 WV_DIM = 300
 
 
-def _download():
+def _download() -> str:
     path = get_corpus_path("thai2fit_wv")
     if not path:
         download_data("thai2fit_wv")
@@ -33,7 +35,7 @@ def get_model():
 _MODEL = get_model()
 
 
-def most_similar_cosmul(positive: list, negative: list):
+def most_similar_cosmul(positive: List[str], negative: List[str]):
     """
     Word arithmetic operations
     If a word is not in the vocabulary, KeyError will be raised.
@@ -47,18 +49,18 @@ def most_similar_cosmul(positive: list, negative: list):
     return _MODEL.most_similar_cosmul(positive=positive, negative=negative)
 
 
-def doesnt_match(listdata):
+def doesnt_match(words: List[str]) -> str:
     """
     Pick one word that doesn't match other words in the list
     If a word is not in the vocabulary, KeyError will be raised.
 
-    :param list listdata: a list of words
+    :param list words: a list of words
     :return: word that doesn't match
     """
-    return _MODEL.doesnt_match(listdata)
+    return _MODEL.doesnt_match(words)
 
 
-def similarity(word1, word2):
+def similarity(word1: str, word2: str) -> float:
     """
     Get cosine similarity between two words.
     If a word is not in the vocabulary, KeyError will be raised.
@@ -70,7 +72,7 @@ def similarity(word1, word2):
     return _MODEL.similarity(word1, word2)
 
 
-def sentence_vectorizer(text, use_mean=True):
+def sentence_vectorizer(text: str, use_mean: bool = True):
     """
     Get sentence vector from text
     If a word is not in the vocabulary, KeyError will be raised.
diff --git a/requirements.txt b/requirements.txt
index 3159b92b1..7fd66ad78 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,5 @@
-marisa-trie
-nltk>=3.2.2
 dill
-marisa_trie
+marisa-trie
 nltk>=3.2.2
 pytz
 requests
diff --git a/setup.cfg b/setup.cfg
index 350779304..bb022e678 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0
+current_version = 2.0.1
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index a47948438..2fe1ac65f 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
     "ipa": ["epitran"],
     "ml": ["fastai>=1.0.38", "keras", "numpy", "torch"],
     "ner": ["sklearn-crfsuite"],
-    "thai2fit": ["gensim", "numpy","emoji"],
+    "thai2fit": ["emoji", "gensim", "numpy"],
     "thai2rom": ["keras", "numpy"],
     "full": [
         "artagger",
@@ -34,7 +34,7 @@
 
 setup(
     name="pythainlp",
-    version="2.0",
+    version="2.0.1",
     description="Thai Natural Language Processing library",
     long_description=readme,
     long_description_content_type="text/markdown",
@@ -54,16 +54,12 @@
             "stopwords_th.txt",
             "syllables_th.txt",
             "tha-wn.db",
-            "new-thaidict.txt",
-            "negation.txt",
-            "provinces.csv",
-            "pt_tagger_1.dill",
-            "ud_thai-pud_pt_tagger.dill",
-            "ud_thai-pud_unigram_tagger.dill",
-            "unigram_tagger.dill",
-            "words_th.txt",
+            "thailand_provinces_th.txt",
+            "tnc_freq.txt",
+            "ud_thai_pud_pt_tagger.dill",
+            "ud_thai_pud_unigram_tagger.dill",
             "words_th_frozen_201810.txt",
-            "tnc_freq.txt"
+            "words_th.txt",
         ],
     },
     include_package_data=True,
@@ -77,6 +73,8 @@
        "natural language processing",
        "text analytics",
        "ThaiNLP",
+       "text processing",
+       "localization",
     ],
     classifiers=[
         "Development Status :: 5 - Production/Stable",
diff --git a/tests/__init__.py b/tests/__init__.py
index 6ba23adda..e569951cd 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -20,11 +20,10 @@
     tnc,
     ttc,
     wordnet,
-    download
+    download,
 )
 from pythainlp.soundex import lk82, metasound, soundex, udom83
-from pythainlp.spell import correct, spell
-from pythainlp.spell.pn import NorvigSpellChecker, dictionary, known, prob
+from pythainlp.spell import correct, spell, NorvigSpellChecker
 from pythainlp.summarize import summarize
 from pythainlp.tag import perceptron, pos_tag, pos_tag_sents, unigram
 from pythainlp.tag.locations import tag_provinces
@@ -37,7 +36,7 @@
     multi_cut,
     newmm,
     dict_trie,
-    Tokenizer
+    Tokenizer,
 )
 from pythainlp.tokenize import pyicu as tokenize_pyicu
 from pythainlp.tokenize import (
@@ -58,9 +57,9 @@
     digit_to_text,
     eng_to_thai,
     find_keyword,
-    is_thai,
-    is_thaichar,
-    is_thaiword,
+    countthai,
+    isthai,
+    isthaichar,
     normalize,
     now_reign_year,
     num_to_thaiword,
@@ -72,9 +71,9 @@
     thai_strftime,
     thai_to_eng,
     thaiword_to_num,
-    thaicheck
+    thaicheck,
 )
-#from pythainlp.ulmfit import rm_brackets
+
 
 class TestUM(unittest.TestCase):
     """
@@ -177,10 +176,6 @@ def test_spell(self):
         self.assertEqual(correct(""), "")
         self.assertIsNotNone(correct("ทดสอง"))
 
-        self.assertIsNotNone(dictionary())
-        self.assertGreaterEqual(prob("มี"), 0)
-        self.assertIsNotNone(known(["เกิด", "abc", ""]))
-
         checker = NorvigSpellChecker(dict_filter="")
         self.assertIsNotNone(checker.dictionary())
         self.assertGreaterEqual(checker.prob("มี"), 0)
@@ -262,7 +257,13 @@ def test_ner(self):
         self.assertEqual(ner.get_ner(""), [])
         self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"))
         self.assertIsNotNone(ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า", pos=False))
-        self.assertIsNotNone(ner.get_ner("คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ ที่อยู่ มหาวิทยาลัยขอนแก่น วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง จังหวัดหนองคาย 43000"))
+        self.assertIsNotNone(
+            ner.get_ner(
+                """คณะวิทยาศาสตร์ประยุกต์และวิศวกรรมศาสตร์ มหาวิทยาลัยขอนแก่น
+                วิทยาเขตหนองคาย 112 หมู่ 7 บ้านหนองเดิ่น ตำบลหนองกอมเกาะ อำเภอเมือง
+                จังหวัดหนองคาย 43000"""
+            )
+        )
         # self.assertEqual(
         #     ner.get_ner("แมวทำอะไรตอนห้าโมงเช้า"),
         #     [
@@ -339,8 +340,9 @@ def test_word_tokenize(self):
         self.assertIsNotNone(word_tokenize("ทดสอบ", engine="XX"))
         self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
         self.assertIsNotNone(word_tokenize("", engine="deepcut"))
+
     def test_Tokenizer(self):
-        t_test=Tokenizer()
+        t_test = Tokenizer()
         self.assertEqual(t_test.word_tokenize(""), [])
 
     def test_word_tokenize_icu(self):
@@ -399,7 +401,8 @@ def test_sent_tokenize(self):
         self.assertEqual(sent_tokenize(None), [])
         self.assertEqual(sent_tokenize(""), [])
         self.assertEqual(
-            sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace"), ["รักน้ำ", "รักปลา", ""]
+            sent_tokenize("รักน้ำ  รักปลา  ", engine="whitespace"),
+            ["รักน้ำ", "รักปลา", ""],
         )
         self.assertEqual(sent_tokenize("รักน้ำ  รักปลา  "), ["รักน้ำ", "รักปลา"])
 
@@ -416,9 +419,9 @@ def test_syllable_tokenize(self):
         )
 
     def test_tcc(self):
-        self.assertEqual(tcc.tcc(None), "")
-        self.assertEqual(tcc.tcc(""), "")
-        self.assertEqual(tcc.tcc("ประเทศไทย"), "ป/ระ/เท/ศ/ไท/ย")
+        self.assertEqual(tcc.tcc(None), [])
+        self.assertEqual(tcc.tcc(""), [])
+        self.assertEqual(tcc.tcc("ประเทศไทย"), ["ป", "ระ", "เท", "ศ", "ไท", "ย"])
 
         self.assertEqual(list(tcc.tcc_gen("")), [])
         self.assertEqual(tcc.tcc_pos(""), set())
@@ -558,20 +561,24 @@ def test_normalize(self):
 
     # ### pythainlp.util.thai
 
-    def test_is_thai(self):
-        self.assertEqual(is_thai("ประเทศไทย"), {"thai": 100.0})
-        self.assertIsNotNone(is_thai("เผือก", check_all=True))
-        self.assertIsNotNone(is_thai("เผือกabc", check_all=True))
-
-    def test_is_thaichar(self):
-        self.assertEqual(is_thaichar("ก"), True)
-        self.assertEqual(is_thaichar("a"), False)
-        self.assertEqual(is_thaichar("0"), False)
-
-    def test_is_thaiword(self):
-        self.assertEqual(is_thaiword("ไทย"), True)
-        self.assertEqual(is_thaiword("ต.ค."), True)
-        self.assertEqual(is_thaiword("ไทย0"), False)
+    def test_countthai(self):
+        self.assertEqual(countthai(""), 0)
+        self.assertEqual(countthai("ประเทศไทย"), 100.0)
+        self.assertEqual(countthai("(กกต.)", ".()"), 100.0)
+        self.assertEqual(countthai("(กกต.)", None), 50.0)
+
+    def test_isthaichar(self):
+        self.assertEqual(isthaichar("ก"), True)
+        self.assertEqual(isthaichar("a"), False)
+        self.assertEqual(isthaichar("0"), False)
+
+    def test_isthai(self):
+        self.assertEqual(isthai("ไทย"), True)
+        self.assertEqual(isthai("ไทย0"), False)
+        self.assertEqual(isthai("ต.ค."), True)
+        self.assertEqual(isthai("(ต.ค.)"), False)
+        self.assertEqual(isthai("ต.ค.", ignore_chars=None), False)
+        self.assertEqual(isthai("(ต.ค.)", ignore_chars=".()"), True)
 
     def test_is_thaicheck(self):
         self.assertEqual(thaicheck("ตา"), True)
@@ -608,5 +615,6 @@ def test_thai2vec(self):
             word_vector.doesnt_match(["ญี่ปุ่น", "พม่า", "ไอติม"]), "ไอติม"
         )
 
+
 if __name__ == "__main__":
     unittest.main()