diff --git a/README-pypi.md b/README-pypi.md
index 2a441baa4..51ad0707b 100644
--- a/README-pypi.md
+++ b/README-pypi.md
@@ -1,6 +1,6 @@
 ![PyThaiNLP Logo](https://avatars0.githubusercontent.com/u/32934255?s=200&v=4)
 
-# PyThaiNLP 2.0.3
+# PyThaiNLP
 
 PyThaiNLP is a Python library for natural language processing (NLP) of Thai language.
 
@@ -8,21 +8,20 @@ PyThaiNLP includes Thai word tokenizers, transliterators, soundex converters, pa
 
 📫 follow us on Facebook [PyThaiNLP](https://www.facebook.com/pythainlp/)
 
-## What's new in version 2.0 ?
+## What's new in 2.0 ?
 
-- New NorvigSpellChecker spell checker class, which can be initialized with custom dictionary.
 - Terminate Python 2 support. Remove all Python 2 compatibility code.
-- Remove old, obsolated, deprecated, and experimental code.
-- Thai2fit (Upgrade ULMFiT-related codes to fastai 1.0)
-- ThaiNER 1.0
-- Remove sentiment analysis
-- Improved word_tokenize (newmm, mm) and dict_word_tokenize
-- Improved POS-tagging
-- See examples in [Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb)
+- Improved `word_tokenize` ("newmm" and "mm" engine), a `custom_dict` dictionary can be provided
+- Improved `pos_tag` Part-Of-Speech tagging
+- New `NorvigSpellChecker` spell checker class, which can be initialized with custom dictionary.
+- New `thai2fit` (replacing `thai2vec`, upgrade ULMFiT-related code to fastai 1.0)
+- Updated ThaiNER to 1.0
+  - You may need to [update your existing ThaiNER models from PyThaiNLP 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
+- Remove old, obsolated, deprecated, duplicated, and experimental code.
+  - Sentiment analysis is no longer part of the library, but rather [a text classification example](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/sentiment_analysis.ipynb).
+- See more examples in [Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb)
 - [Full change log](https://github.com/PyThaiNLP/pythainlp/issues/118)
 - [Upgrading from 1.7](https://thainlp.org/pythainlp/docs/2.0/notes/pythainlp-1_7-2_0.html)
-- [Upgrade ThaiNER from 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
-
 
 ## Install
 
diff --git a/README.md b/README.md
index 9ff268b21..287470aee 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ PyThaiNLP is a Python package for text processing and linguistic analysis, simil
 
 **This is a document for development branch (post 2.0). Things will break.**
 
-- The latest stable release is [2.0.3](https://github.com/PyThaiNLP/pythainlp/tree/master)
+- The latest stable release is [2.0.4](https://github.com/PyThaiNLP/pythainlp/tree/master)
 - PyThaiNLP 2 supports Python 3.6+. Some functions may work with older version of Python 3, but it is not well-tested and will not be supported. See [change log](https://github.com/PyThaiNLP/pythainlp/issues/118).
   - [Upgrading from 1.7](https://thainlp.org/pythainlp/docs/2.0/notes/pythainlp-1_7-2_0.html)
   - [Upgrade ThaiNER from 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
@@ -26,15 +26,15 @@ PyThaiNLP is a Python package for text processing and linguistic analysis, simil
 
 ## Capabilities
 
-- Convenient character and word classes, like Thai consonants (```pythainlp.thai_consonants```), vowels (```pythainlp.thai_vowels```), digits (```pythainlp.thai_digits```), and stop words (```pythainlp.corpus.thai_stopwords```) -- comparable to constants like ```string.letters```, ```string.digits```, and ```string.punctuation```
-- Thai word segmentation (```word_tokenize```), including subword segmentation based on Thai Character Cluster (```tcc```) and ETCC (```etcc```)
-- Thai romanization and transliteration (```romanize```, ```transliterate```)
-- Thai part-of-speech taggers (```pos_tag```)
-- Read out number to Thai words (```bahttext```, ```num_to_thaiword```)
-- Thai collation (sort by dictionoary order) (```collate```)
-- Thai-English keyboard misswitched fix (```eng_to_thai```, ```thai_to_eng```)
-- Thai misspellings detection and spelling correction (```spell```)
-- Thai soundex (```lk82```, ```udom83```, ```metasound```)
+- Convenient character and word classes, like Thai consonants (`pythainlp.thai_consonants`), vowels (`pythainlp.thai_vowels`), digits (`pythainlp.thai_digits`), and stop words (`pythainlp.corpus.thai_stopwords`) -- comparable to constants like `string.letters`, `string.digits`, and `string.punctuation`
+- Thai word segmentation (`word_tokenize`), including subword segmentation based on Thai Character Cluster (`subword_tokenize`)
+- Thai transliteration (`transliterate`)
+- Thai part-of-speech taggers (`pos_tag`)
+- Read out number to Thai words (`bahttext`, `num_to_thaiword`)
+- Thai collation (sort by dictionoary order) (`collate`)
+- Thai-English keyboard misswitched fix (`eng_to_thai`, `thai_to_eng`)
+- Thai spelling suggestion and correction (`spell` and `correct`)
+- Thai soundex (`soundex`) with three engines (`lk82`, `udom83`, `metasound`)
 - Thai WordNet wrapper
 - and much more - see examples in [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb).
 
@@ -62,20 +62,20 @@ For some advanced functionalities, like word vector, extra packages may be neede
 $ pip install pythainlp[extra1,extra2,...]
 ```
 
-where ```extras``` can be
-  - ```artagger``` (to support artagger part-of-speech tagger)*
-  - ```deepcut``` (to support deepcut machine-learnt tokenizer)
-  - ```icu``` (for ICU support in transliteration and tokenization)
-  - ```ipa``` (for International Phonetic Alphabet support in transliteration)
-  - ```ml``` (to support fastai 1.0.22 ULMFiT models)
-  - ```ner``` (for named-entity recognizer)
-  - ```thai2fit``` (for Thai word vector)
-  - ```thai2rom``` (for machine-learnt romanization)
-  - ```full``` (install everything)
+where `extras` can be
+  - `artagger` (to support artagger part-of-speech tagger)*
+  - `deepcut` (to support deepcut machine-learnt tokenizer)
+  - `icu` (for ICU, International Components for Unicode, support in transliteration and tokenization)
+  - `ipa` (for IPA, International Phonetic Alphabet, support in transliteration)
+  - `ml` (to support fastai 1.0.22 ULMFiT models)
+  - `ner` (for named-entity recognizer)
+  - `thai2fit` (for Thai word vector)
+  - `thai2rom` (for machine-learnt romanization)
+  - `full` (install everything)
 
-* Note: standard ```artagger``` package from PyPI will not work on Windows, please ```pip install https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger``` instead.
+* Note: standard `artagger` package from PyPI will not work on Windows, please ```pip install https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger``` instead.
 
-** see ```extras``` and ```extras_require``` in [```setup.py```](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py) for package details.
+** see `extras` and `extras_require` in [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py) for package details.
 
 ## Documentation
 
@@ -106,7 +106,7 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 
 **เอกสารนี้สำหรับรุ่นพัฒนา อาจมีการเปลี่ยนแปลงได้ตลอด**
 
-- รุ่นเสถียรล่าสุดคือรุ่น [2.0.3](https://github.com/PyThaiNLP/pythainlp/tree/master)
+- รุ่นเสถียรล่าสุดคือรุ่น [2.0.4](https://github.com/PyThaiNLP/pythainlp/tree/master)
 - PyThaiNLP 2 รองรับ Python 3.6 ขึ้นไป
 - ผู้ใช้ Python 2.7+ ยังสามารถใช้ PyThaiNLP 1.6 ได้
 
@@ -114,16 +114,15 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนเพื่
 
 ## ความสามารถ
 
-- ชุดค่าคงที่ตัวอักษระและคำไทยที่เรียกใช้ได้สะดวก เช่น พยัญชนะ (```pythainlp.thai_consonants```), สระ (```pythainlp.thai_vowels```), ตัวเลขไทย (```pythainlp.thai_digits```), และ stop word (```pythainlp.corpus.thai_stopwords```) -- เหมือนกับค่าคงที่อย่าง ```string.letters```, ```string.digits```, และ ```string.punctuation```
-- Thai word segmentation (```word_tokenize```), including subword segmentation based on Thai Character Cluster (```tcc```) and ETCC (```etcc```)
-- ตัดคำภาษาไทย (```word_tokenize```) และรองรับ Thai Character Clusters (```tcc```) และ ETCC (```etcc```)
-- ถอดเสียงภาษาไทยเป็นอักษรละตินและสัทอักษร (```romanize```, ```transliterate```)
-- ระบุชนิดคำ (part-of-speech) ภาษาไทย (```pos_tag```)
-- อ่านตัวเลขเป็นข้อความภาษาไทย (```bahttext```, ```num_to_thaiword```)
-- เรียงลำดับคำตามพจนานุกรมไทย (```collate```)
-- แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา (```eng_to_thai```, ```thai_to_eng```)
-- ตรวจคำสะกดผิดในภาษาไทย (```spell```)
-- soundex ภาษาไทย (```lk82```, ```udom83```, ```metasound```)
+- ชุดค่าคงที่ตัวอักษระและคำไทยที่เรียกใช้ได้สะดวก เช่น พยัญชนะ (`pythainlp.thai_consonants`), สระ (`pythainlp.thai_vowels`), ตัวเลขไทย (`pythainlp.thai_digits`), และ stop word (`pythainlp.corpus.thai_stopwords`) -- เหมือนกับค่าคงที่อย่าง `string.letters`, `string.digits`, และ `string.punctuation`
+- ตัดคำภาษาไทย (`word_tokenize`) และรองรับการตัดระดับต่ำกว่าคำโดยใช้ Thai Character Clusters (`subword_tokenize`)
+- ถอดเสียงภาษาไทยเป็นอักษรละตินและสัทอักษร (`transliterate`)
+- ระบุชนิดคำ (part-of-speech) ภาษาไทย (`pos_tag`)
+- อ่านตัวเลขเป็นข้อความภาษาไทย (`bahttext`, `num_to_thaiword`)
+- เรียงลำดับคำตามพจนานุกรมไทย (`collate`)
+- แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา (`eng_to_thai`, `thai_to_eng`)
+- ตรวจคำสะกดผิดในภาษาไทย (`spell`, `correct`)
+- soundex ภาษาไทย (`soundex`) 3 วิธีการ (`lk82`, `udom83`, `metasound`)
 - Thai WordNet wrapper
 - และอื่น ๆ ดูตัวอย่างได้ใน [PyThaiNLP Get Started notebook](https://github.com/PyThaiNLP/pythainlp/blob/dev/notebooks/pythainlp-get-started.ipynb)
 
@@ -147,20 +146,20 @@ $ pip install https://github.com/PyThaiNLP/pythainlp/archive/dev.zip
 $ pip install pythainlp[extra1,extra2,...]
 ```
 
-โดยที่ ```extras``` คือ
-  - ```artagger``` (สำหรับตัวติดป้ายกำกับชนิดคำ artagger)*
-  - ```deepcut``` (สำหรับตัวตัดคำ deepcut)
-  - ```icu``` (สำหรับการถอดตัวสะกดเป็นสัทอักษรและการตัดคำด้วย ICU)
-  - ```ipa``` (สำหรับการถอดตัวสะกดเป็นสัทอักษรสากล (IPA))
-  - ```ml``` (สำหรับการรองรับโมเดล ULMFiT)
-  - ```ner``` (สำหรับการติดป้ายชื่อเฉพาะ (named-entity))
-  - ```thai2fit``` (สำหรับ word vector)
-  - ```thai2rom``` (สำหรับการถอดตัวสะกดเป็นอักษรละติน)
-  - ```full``` (ติดตั้งทุกอย่าง)
+โดยที่ `extras` คือ
+  - `artagger` (สำหรับตัวติดป้ายกำกับชนิดคำ artagger)*
+  - `deepcut` (สำหรับตัวตัดคำ deepcut)
+  - `icu` (สำหรับการถอดตัวสะกดเป็นสัทอักษรและการตัดคำด้วย ICU)
+  - `ipa` (สำหรับการถอดตัวสะกดเป็นสัทอักษรสากล (IPA))
+  - `ml` (สำหรับการรองรับโมเดล ULMFiT)
+  - `ner` (สำหรับการติดป้ายชื่อเฉพาะ (named-entity))
+  - `thai2fit` (สำหรับ word vector)
+  - `thai2rom` (สำหรับการถอดตัวสะกดเป็นอักษรละติน)
+  - `full` (ติดตั้งทุกอย่าง)
 
-* หมายเหตุ: แพคเกจ ```artagger``` มาตรฐานจาก PyPI อาจมีปัญหาการถอดรหัสข้อความบน Windows กรุณาติดตั้ง artagger รุ่นแก้ไขด้วยคำสั่ง ```pip install https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger``` แทน ก่อนจะติดตั้ง PyThaiNLP
+* หมายเหตุ: แพคเกจ `artagger` มาตรฐานจาก PyPI อาจมีปัญหาการถอดรหัสข้อความบน Windows กรุณาติดตั้ง artagger รุ่นแก้ไขด้วยคำสั่ง ```pip install https://github.com/wannaphongcom/artagger/tarball/master#egg=artagger``` แทน ก่อนจะติดตั้ง PyThaiNLP
 
-** นักพัฒนาสามารถดู ```extras``` และ ```extras_require``` ใน [```setup.py```](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py) สำหรับรายละเอียดแพคเกจของเสริม
+** สามารถดู `extras` และ `extras_require` ใน [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py) สำหรับรายละเอียดแพคเกจของเสริม
 
 ## เอกสารการใช้งาน
 
diff --git a/bin/pythainlp b/bin/pythainlp
index 71d2a1686..d49f9fd3d 100644
--- a/bin/pythainlp
+++ b/bin/pythainlp
@@ -1,7 +1,7 @@
 #!python3
 # -*- coding: utf-8 -*-
 
-_VERSION = "2.0.3"
+_VERSION = "2.0.4"
 
 import argparse
 
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index 7d493cd50..71dd45e27 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "2.0.3" %}
+{% set version = "2.0.4" %}
 
 package:
   name: pythainlp
diff --git a/docs/api/tokenize.rst b/docs/api/tokenize.rst
index dee8cda94..93cb11d97 100644
--- a/docs/api/tokenize.rst
+++ b/docs/api/tokenize.rst
@@ -8,10 +8,10 @@ The :class:`pythainlp.tokenize` contains multiple functions for tokenizing a chu
 Modules
 -------
 
+.. autofunction:: sent_tokenize
 .. autofunction:: word_tokenize
-.. autofunction:: dict_word_tokenize
+.. autofunction:: syllable_tokenize
 .. autofunction:: subword_tokenize
-.. autofunction:: sent_tokenize
 .. autofunction:: dict_trie
 .. autoclass:: Tokenizer
    :members: word_tokenize, set_tokenize_engine
diff --git a/docs/archive/pythainlp-1-3-thai.md b/docs/archive/pythainlp-1-3-thai.md
deleted file mode 100644
index cd694c64f..000000000
--- a/docs/archive/pythainlp-1-3-thai.md
+++ /dev/null
@@ -1,215 +0,0 @@
-# คู่มือการใช้งาน PyThaiNLP 1.3
-
-รองรับเฉพาะ Python 3.4 ขึ้นไปเท่านั้น
-
-ติดตั้งใช้คำสั่ง
-
-```
-pip install pythainlp
-```
-
-**วิธีติดตั้งสำหรับ Windows**
-
-ให้ทำการติดตั้ง pyicu โดยใช้ไฟล์ .whl จาก [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) 
-
-หากใช้ python 3.5 64 bit ให้โหลด PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl แล้วเปิด cmd ใช้คำสั่ง
-
-```
-pip install PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl
-```
-
-แล้วจึงใช้ 
-
-```
-pip install pythainlp
-```
-
-**ติดตั้งบน Mac**
-
-```sh
-$ brew install icu4c --force
-$ brew link --force icu4c
-$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
-```
-
-ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
-
-## API
-
-### ตัดคำไทย
-
-สำหรับการตัดคำไทยนั้น ใน PyThaiNLP 1.3 ได้ทำเปลี่ยน API ใหม่ ยกเลิก pythainlp.segment ให้ทำการเปลี่ยนไปใช้ API ชุดใหม่
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-text คือ ข้อความในรูปแบบสตริง str เท่านั้น
-
-engine คือ ระบบตัดคำไทย ปัจจุบันนี้ PyThaiNLP ได้พัฒนามี 3 engine ให้ใช้งานกันดังนี้
-
-1. icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ) และเป็นค่าเริ่มต้น
-2. dict - เป็นการตัดคำโดยใช้พจานุกรมจาก thaiword.txt ใน corpus  (ความแม่นยำปานกลาง)
-3. mm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย
-
-คืนค่าเป็น ''list'' เช่น ['แมว','กิน']
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-### Postaggers ภาษาไทย
-
-ตั้งแต่ PyThaiNLP 1.3 เป็นต้นไป ได้ทำการยกเลิก pythainlp.postaggers เดิม เปลี่ยนไปใช้ API ชุดใหม่ดังนี้
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
-
-engine คือ ชุดเครื่องมือในการ postaggers มี 2 ตัวดังนี้
-
-1. old เป็น UnigramTagger (ค่าเริ่มต้น)
-2. artagger เป็น RDR POS Tagger ละเอียดยิ่งกว่าเดิม รองรับเฉพาะ Python 3 เท่านั้น
-
-### แปลงข้อความเป็น Latin
-
-```python
-from pythainlp.romanization import romanization
-romanization(str)
-```
-**ตัวอย่าง**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### เช็คคำผิด * 
-
-*ความสามารถนี้รองรับเฉพาะ Python 3
-
-ก่อนใช้งานความสามารถนี้ ให้ทำการติดตั้ง hunspell และ hunspell-th ก่อน
-
-**วิธีติดตั้ง** สำหรับบน Debian , Ubuntu
-
-```
-sudo apt-get install hunspell hunspell-th
-```
-
-บน Mac OS ติดตั้งตามนี้ [http://pankdm.github.io/hunspell.html](http://pankdm.github.io/hunspell.html)
-
-ให้ใช้ pythainlp.spell ตามตัวอย่างนี้
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม', 'เสียเหลี่ยม', 'เหลี่ยม']
-```
-### pythainlp.number
-
-```python
-from pythainlp.number import *
-```
-จัดการกับตัวเลข โดยมีดังนี้
-
-- nttn(str)  - เป็นการแปลงเลขไทยสู่เลข
-- nttt(str) - เลขไทยสู่ข้อความ
-- ntnt(str) - เลขสู่เลขไทย
-- ntt(str) - เลขสู่ข้อความ
-- ttn(str) - ข้อความสู่เลข
-- numtowords(float) -  อ่านจำนวนตัวเลขภาษาไทย (บาท) รับค่าเป็น ''float'' คืนค่าเป็น  'str'
-
-### เรียงลำดับข้อมูลภาษาไทยใน List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-รับ list คืนค่า list
-
-### รับเวลาปัจจุบันเป็นภาษาไทย
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### WordNet ภาษาไทย
-
-เรียกใช้งาน
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**รับ Synset**
-
-```python
-wordnet.getSynset(คำ)
-```
-
-เป็นคำสั่ง ใช้รับ Synset รับค่า str ส่งออกเป็น tuple ('Synset', 'synset li')
-
-**รับคำจาก id**
-
-```python
-wordnet.getWords()
-```
-
-เป็นคำสั่ง ใช้รับคำจาก ID รับค่า str ส่งออกเป็น tuple ('Word', 'synsetid li')
-
-### stopword ภาษาไทย
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-### หาคำที่มีจำนวนการใช้งานมากที่สุด
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-คืนค่าออกมาเป็น dict
-
-**ตัวอย่างการใช้งาน**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
-
-```python
-from pythainlp.change import *
-```
-
-มีคำสั่งดังนี้
-
-- texttothai(str) แปลงแป้นตัวอักษรภาษาอังกฤษเป็นภาษาไทย
-- texttoeng(str) แปลงแป้นตัวอักษรภาษาไทยเป็นภาษาอังกฤษ
-
-คืนค่าออกมาเป็น str
-
-### Sentiment analysis ภาษาไทย
-
-ใช้ข้อมูลจาก https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-รับค่า str ส่งออกเป็น pos , neg หรือ neutral
\ No newline at end of file
diff --git a/docs/archive/pythainlp-1-4-eng.md b/docs/archive/pythainlp-1-4-eng.md
deleted file mode 100644
index 6955dc8e6..000000000
--- a/docs/archive/pythainlp-1-4-eng.md
+++ /dev/null
@@ -1,311 +0,0 @@
-# User manual PyThaiNLP 1.4
-
-[TOC]
-
-## API
-
-### Thai segment
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-**text** refers to an input text string in Thai.
-
-**engine** refers to a thai word segmentation system; There are 6 systems to choose from.
-
-1. icu (default) - pyicu has a very poor performance. 
-2. dict - dictionary-based tokenizer. It returns False if the message can not be wrapped.
-3. mm - Maximum Matching algorithm for Thai word segmentation.
-4. newmm - Maximum Matching algorithm for Thai word segmatation. Developed by Korakot Chaovavanich (https://www.facebook.com/groups/408004796247683/permalink/431283740586455/)
-5. pylexto - LexTo.
-6. deepcut - Deep Learning based Thai word segmentation (https://github.com/rkcosmos/deepcut)
-
-
-Output: ''list'' ex. ['แมว','กิน']
-
-**Example**
-
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-### Thai postaggers
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-engine
-
-1. old is the UnigramTagger (default)
-2. artagger is the RDR POS Tagger.
-
-### Thai romanization
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='pyicu')
-```
-There are 2 engines
-
-- pyicu
-- royin
-
-data :
-
-input ''str''
-
-returns ''str'' 
-
-**Example**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### Spell Check 
-
-Before using this module,  please install hunspell and hunspell-th.
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม', 'เสียเหลี่ยม', 'เหลี่ยม']
-```
-### pythainlp.number
-
-```python
-from pythainlp.number import *
-```
-- nttn(str)  - convert thai numbers to numbers.
-- nttt(str) - Thai Numbers to text.
-- ntnt(str) - numbers to thai numbers.
-- ntt(str) -  numbers to text.
-- ttn(str) - text to  numbers.
-- numtowords(float) -  Read thai numbers (Baht) input ''float'' returns  'str'
-
-### Sort Thai text into List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-input list 
-
-returns list
-
-### Get current time in Thai
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### Thai WordNet
-
-import
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**Use**
-
-It's like nltk.
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**Example**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-### Find the most frequent words.
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-returns dict
-
-**Example**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### Incorrect input language correction
-
-```python
-from pythainlp.change import *
-```
-
-- texttothai(str) - eng to thai.
-- texttoeng(str) - thai to eng.
-
-### Thai Character Clusters (TCC)
-
-TCC : Mr.Jakkrit TeCho
-
-grammar :  Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-Code :  Korakot Chaovavanich 
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-### Enhanced Thai Character Cluster (ETCC)
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-### Thai Soundex
-
-credit Korakot Chaovavanich (from https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-- LK82
-- Udom83
-
-**Example**
-
-```python
->>> from pythainlp.soundex import LK82
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Thai meta sound
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**Example**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### Thai sentiment analysis
-
-using data from [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-input str returns pos , neg or neutral
-
-### Util
-
-using
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-for building ngrams 
-
-```python
-ngrams(token,num)
-```
-
-- token - list
-- num - ngrams
-
-### Corpus
-
-#### Thai stopword
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### Thai country name
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### Tone in Thai
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### Consonant in thai
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### Word list in thai
-
-```python
-from pythainlp.corpus.thaiword import get_data # old data
-get_data()
-from pythainlp.corpus.newthaiword import get_data # new data
-get_data()
-```
diff --git a/docs/archive/pythainlp-1-4-eng.pdf b/docs/archive/pythainlp-1-4-eng.pdf
deleted file mode 100644
index 9af1abbc0..000000000
Binary files a/docs/archive/pythainlp-1-4-eng.pdf and /dev/null differ
diff --git a/docs/archive/pythainlp-1-4-thai.md b/docs/archive/pythainlp-1-4-thai.md
deleted file mode 100644
index 6dd4abc33..000000000
--- a/docs/archive/pythainlp-1-4-thai.md
+++ /dev/null
@@ -1,375 +0,0 @@
-# คู่มือการใช้งาน PyThaiNLP 1.4
-
-[TOC]
-
-Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน !**
-
-> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
-
-รองรับเฉพาะ Python 3.4 ขึ้นไปเท่านั้น
-
-ติดตั้งใช้คำสั่ง
-
-```
-pip install pythainlp
-```
-
-**วิธีติดตั้งสำหรับ Windows**
-
-ให้ทำการติดตั้ง pyicu โดยใช้ไฟล์ .whl จาก [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) 
-
-หากใช้ python 3.5 64 bit ให้โหลด PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl แล้วเปิด cmd ใช้คำสั่ง
-
-```
-pip install PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl
-```
-
-แล้วจึงใช้ 
-
-```
-pip install pythainlp
-```
-
-**ติดตั้งบน Mac**
-
-```sh
-$ brew install icu4c --force
-$ brew link --force icu4c
-$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
-```
-
-ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
-
-## API
-
-### ตัดคำไทย
-
-สำหรับการตัดคำไทยนั้น ใช้ API ดังต่อไปนี้
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-text คือ ข้อความในรูปแบบสตริง str เท่านั้น
-
-engine คือ ระบบตัดคำไทย ปัจจุบันนี้ PyThaiNLP ได้พัฒนามี 6 engine ให้ใช้งานกันดังนี้
-
-1. icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ) และเป็นค่าเริ่มต้น
-2. dict - เป็นการตัดคำโดยใช้พจานุกรมจาก thaiword.txt ใน corpus  (ความแม่นยำปานกลาง) จะคืนค่า False หากข้อความนั้นไม่สามารถตัดคำได้
-3. mm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย - API ชุดเก่า
-4. newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่ โดยใช้โค้ดคุณ Korakot Chaovavanich  จาก https://www.facebook.com/groups/408004796247683/permalink/431283740586455/ มาพัฒนาต่อ
-5. pylexto ใช้ LexTo ในการตัดคำ
-6. deepcut ใช้ deepcut จาก https://github.com/rkcosmos/deepcut ในการตัดคำภาษาไทย
-
-คืนค่าเป็น ''list'' เช่น ['แมว','กิน']
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-### Postaggers ภาษาไทย
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
-
-engine คือ ชุดเครื่องมือในการ postaggers มี 2 ตัวดังนี้
-
-1. old เป็น UnigramTagger (ค่าเริ่มต้น)
-2. artagger เป็น RDR POS Tagger ละเอียดยิ่งกว่าเดิม รองรับเฉพาะ Python 3 เท่านั้น
-
-### แปลงข้อความเป็น Latin
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='pyicu')
-```
-มี 2 engine ดังนี้
-
-- pyicu ส่งค่า Latin
-- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน (**หากมีข้อผิดพลาด ให้ใช้คำอ่าน เนื่องจากตัว royin ไม่มีตัวแปลงคำเป็นคำอ่าน**)
-
-data :
-
-รับค่า ''str'' ข้อความ 
-
-คืนค่าเป็น ''str'' ข้อความ
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### เช็คคำผิด 
-
-ก่อนใช้งานความสามารถนี้ ให้ทำการติดตั้ง hunspell และ hunspell-th ก่อน
-
-**วิธีติดตั้ง** สำหรับบน Debian , Ubuntu
-
-```
-sudo apt-get install hunspell hunspell-th
-```
-
-บน Mac OS ติดตั้งตามนี้ [http://pankdm.github.io/hunspell.html](http://pankdm.github.io/hunspell.html)
-
-ให้ใช้ pythainlp.spell ตามตัวอย่างนี้
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม', 'เสียเหลี่ยม', 'เหลี่ยม']
-```
-### pythainlp.number
-
-```python
-from pythainlp.number import *
-```
-จัดการกับตัวเลข โดยมีดังนี้
-
-- nttn(str)  - เป็นการแปลงเลขไทยสู่เลข
-- nttt(str) - เลขไทยสู่ข้อความ
-- ntnt(str) - เลขสู่เลขไทย
-- ntt(str) - เลขสู่ข้อความ
-- ttn(str) - ข้อความสู่เลข
-- numtowords(float) -  อ่านจำนวนตัวเลขภาษาไทย (บาท) รับค่าเป็น ''float'' คืนค่าเป็น  'str'
-
-### เรียงลำดับข้อมูลภาษาไทยใน List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-รับ list คืนค่า list
-
-### รับเวลาปัจจุบันเป็นภาษาไทย
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### WordNet ภาษาไทย
-
-เรียกใช้งาน
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**การใช้งาน**
-
-API เหมือนกับ NLTK โดยรองรับ API ดังนี้
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**ตัวอย่าง**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-### หาคำที่มีจำนวนการใช้งานมากที่สุด
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-คืนค่าออกมาเป็น dict
-
-**ตัวอย่างการใช้งาน**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
-
-```python
-from pythainlp.change import *
-```
-
-มีคำสั่งดังนี้
-
-- texttothai(str) แปลงแป้นตัวอักษรภาษาอังกฤษเป็นภาษาไทย
-- texttoeng(str) แปลงแป้นตัวอักษรภาษาไทยเป็นภาษาอังกฤษ
-
-คืนค่าออกมาเป็น str
-
-### Thai Character Clusters (TCC)
-
-PyThaiNLP 1.4 รองรับ Thai Character Clusters (TCC) โดยจะแบ่งกลุ่มด้วย /
-
-**เดติด**
-
-TCC : Mr.Jakkrit TeCho
-
-grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-โค้ด : คุณ Korakot Chaovavanich 
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-### Enhanced Thai Character Cluster (ETCC)
-
-นอกจาก TCC แล้ว PyThaiNLP 1.4 ยังรองรับ Enhanced Thai Character Cluster (ETCC) โดยแบ่งกลุ่มด้วย /
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-### Thai Soundex ภาษาไทย
-
-เดติด คุณ Korakot Chaovavanich (จาก https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-กฎที่รองรับในเวชั่น 1.4
-
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ  วิชิตหล่อจีระชุณห์กุล  และ  เจริญ  คุวินทร์พันธุ์ - LK82
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ วรรณี อุดมพาณิชย์ - Udom83
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.soundex import LK82
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Meta Sound ภาษาไทย
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### Sentiment analysis ภาษาไทย
-
-ใช้ข้อมูลจาก [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-รับค่า str ส่งออกเป็น pos , neg หรือ neutral
-
-### Util
-
-การใช้งาน
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-สำหรับสร้าง ngrams 
-
-```python
-ngrams(token,num)
-```
-
-- token คือ list
-- num คือ จำนวน ngrams
-
-### Corpus
-
-#### stopword ภาษาไทย
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### ชื่อประเทศ ภาษาไทย
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### ตัววรรณยุกต์ในภาษาไทย
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### ตัวพยัญชนะในภาษาไทย
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### รายการคำในภาษาไทย
-
-```python
-from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
-get_data()
-from pythainlp.corpus.newthaiword import get_data # ข้อมูลใหม่
-get_data()
-```
-
-เขียนโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
\ No newline at end of file
diff --git a/docs/archive/pythainlp-1-4-thai.pdf b/docs/archive/pythainlp-1-4-thai.pdf
deleted file mode 100644
index 6af963a3c..000000000
Binary files a/docs/archive/pythainlp-1-4-thai.pdf and /dev/null differ
diff --git a/docs/archive/pythainlp-1-5-eng.md b/docs/archive/pythainlp-1-5-eng.md
deleted file mode 100644
index 227bcbb73..000000000
--- a/docs/archive/pythainlp-1-5-eng.md
+++ /dev/null
@@ -1,471 +0,0 @@
-# User manual PyThaiNLP 1.5
-
-[TOC]
-
-## API
-
-### tokenize
-
-#### word_tokenize
-
-word_tokenize is thai word segmatation.
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-**text** refers to an input text string in Thai.
-
-**engine** refers to a thai word segmentation system; There are 6 systems to choose from.
-
-1. icu (default) - pyicu has a very poor performance. 
-2. dict - dictionary-based tokenizer. It returns False if the message can not be wrapped.
-3. mm - Maximum Matching algorithm for Thai word segmentation.
-4. newmm - Maximum Matching algorithm for Thai word segmatation. Developed by Korakot Chaovavanich (https://www.facebook.com/groups/408004796247683/permalink/431283740586455/)
-5. pylexto - LexTo.
-6. deepcut - Deep Learning based Thai word segmentation (https://github.com/rkcosmos/deepcut)
-
-
-Output: ''list'' ex. ['แมว','กิน']
-
-**Example**
-
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-#### dict_word_tokenize
-
-```python
-from pythainlp.tokenize import dict_word_tokenize
-dict_word_tokenize(text,file,engine)
-```
-
-A command for tokenize by using user-defined information.
-
-text : str
-
-file : name file data using in tokenize.
-
-engine
-
-- newmm
-- wordcutpy : using wordcutpy (https://github.com/veer66/wordcutpy)
-- mm
-- longest-matching
-
-Example https://gist.github.com/wannaphongcom/1e862583051bf0464b6ef4ed592f739c
-
-#### sent_tokenize
-
-Thai Sentence Tokenizer
-
-```python
-sent_tokenize(text,engine='whitespace+newline')
-```
-
-engine :
-
-- whitespace - tokenizer from whitespace
-- whitespace+newline - tokenizer from whitespace and newline.
-
-#### Thai Character Clusters (TCC)
-
-TCC : Mr.Jakkrit TeCho
-
-grammar :  Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-Code :  Korakot Chaovavanich 
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-#### Enhanced Thai Character Cluster (ETCC)
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-#### WhitespaceTokenizer
-
-Tokenizer by using spaces
-
-```python
->>> from pythainlp.tokenize import WhitespaceTokenizer
->>> WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง")
-['ทดสอบ', 'ตัดคำช่องว่าง']
-```
-#### isthai
-
-check
-
-### Thai postaggers
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-engine
-
-1. old is the UnigramTagger (default)
-2. artagger is the RDR POS Tagger.
-
-### romanization
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='pyicu')
-```
-There are 2 engines
-
-- pyicu
-- royin
-
-data :
-
-input ''str''
-
-returns ''str'' 
-
-**Example**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### keywords
-
-#### find_keyword
-
-find keywords from thai text in list.
-
-```python
-find_keyword(list,lentext=3)
-```
-lentext is minimum number of keywords words.
-
-return dict {words: number of keywords}
-
-### Spell Check 
-
-```python
-spell(word,engine='pn')
-```
-engine
-
-- 'pn' code from Peter Norvig
-- 'hunspell' using hunspell
-
-Before using this module,  please install hunspell and hunspell-th.
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-#### pn
-
-```python
-correction(word)
-```
-
-Show word possible
-
-**Sample usage**
-
-```python
-from pythainlp.spell.pn import correction
-a=correction("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-### number
-
-```python
-from pythainlp.number import *
-```
-- nttn(str)  - convert thai numbers to numbers.
-- nttt(str) - Thai Numbers to text.
-- ntnt(str) - numbers to thai numbers.
-- ntt(str) -  numbers to text.
-- ttn(str) - text to  numbers.
-- numtowords(float) -  Read thai numbers (Baht) input ''float'' returns  'str'
-
-### Sort Thai text into List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-input list 
-
-returns list
-
-### Get current time in Thai
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### Find the most frequent words.
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-returns dict
-
-**Example**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### Incorrect input language correction
-
-```python
-from pythainlp.change import *
-```
-
-- texttothai(str) - eng to thai.
-- texttoeng(str) - thai to eng.
-
-### Thai Soundex
-
-credit Korakot Chaovavanich (from https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-- LK82
-- Udom83
-
-**Example**
-
-```python
->>> from pythainlp.soundex import LK82,Udom83
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Thai meta sound
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**Example**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### Thai sentiment analysis
-
-using data from [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-input str returns pos , neg or neutral
-
-### Util
-
-using
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-for building ngrams 
-
-```python
-ngrams(token,num)
-```
-
-- token - list
-- num - ngrams
-
-#### bigrams
-
-for building bigrams
-
-```python
-bigrams(token)
-```
-
-- token - list
-
-#### trigram
-
-for building trigram
-
-```python
-trigram(token)
-```
-
-- token - list
-
-#### normalize
-
-fix thai text
-
-```python
-normalize(text)
-```
-
-**Example**
-
-```python
->>> print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
-True
-```
-
-### Corpus
-
-#### Thai stopword
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### Thai country name
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### Tone in Thai
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### Consonant in thai
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### Word list in thai
-
-```python
-from pythainlp.corpus.thaiword import get_data # old data
-get_data()
-from pythainlp.corpus.newthaiword import get_data # new data
-get_data()
-```
-
-#### ConceptNet
-
-Thai tool for ConceptNet.
-
-**find edges**
-
-```python
-edges(word,lang='th')
-```
-
-return dict
-
-#### Thai WordNet
-
-import
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**Use**
-
-It's like nltk.
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**Example**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-#### TNC
-
-Tool for Thai National Corpus (http://www.arts.chula.ac.th/~ling/TNC/index.php)
-
-##### word_frequency
-
-find word frequency
-
-```python
-word_frequency(word,domain='all')
-```
-domain
-
-- all
-- imaginative
-- natural-pure-science
-- applied-science
-- social-science
-- world-affairs-history
-- commerce-finance
-- arts
-- belief-thought
-- leisure
-- others
\ No newline at end of file
diff --git a/docs/archive/pythainlp-1-5-thai.md b/docs/archive/pythainlp-1-5-thai.md
deleted file mode 100644
index ee03c26dd..000000000
--- a/docs/archive/pythainlp-1-5-thai.md
+++ /dev/null
@@ -1,609 +0,0 @@
-# คู่มือการใช้งาน PyThaiNLP 1.5
-
-[TOC]
-
-Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน !**
-
-> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
-
-รองรับ Python 2.7 และ Python 3.4 ขึ้นไปเท่านั้น
-
-ติดตั้งใช้คำสั่ง
-
-```
-pip install pythainlp
-```
-
-**วิธีติดตั้งสำหรับ Windows**
-
-การติดตั้ง pythainlp บน windows ต้องติดตั้ง pyicu ซึ่งทำได้ยากมาก
-วิธีที่ง่ายที่สุดคือใช้ wheel
-
-1. [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) แล้ว download wheel ตาม python ตัวเองเช่น
-  ผมใช้ python x64 3.6.1 บน Windows ก็ให้ใช้ PyICU‑1.9.7‑cp36‑cp36m‑win_amd64.whl
-
-2. `pip install PyICU‑1.9.7‑cp36‑cp36m‑win_amd64.whl`
-
-3. `pip install pythainlp`
-
-**ติดตั้งบน Mac**
-
-** แนะนำให้ใช้ icu 58.2 เนื่องจาก icu 59.1 มาปัญหากับ PyICU **
-
-```sh
-$ brew install icu4c --force
-$ brew link --force icu4c
-$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
-```
-
-ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
-
-## API
-
-### tokenize
-
-#### word_tokenize
-
-สำหรับการตัดคำไทยนั้น ใช้ API ดังต่อไปนี้
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-text คือ ข้อความในรูปแบบสตริง str เท่านั้น
-
-engine คือ ระบบตัดคำไทย ปัจจุบันนี้ PyThaiNLP ได้พัฒนามี 6 engine ให้ใช้งานกันดังนี้
-
-1. icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ) และเป็นค่าเริ่มต้น
-2. dict - เป็นการตัดคำโดยใช้พจานุกรมจาก thaiword.txt ใน corpus  (ความแม่นยำปานกลาง) **จะคืนค่า False หากข้อความนั้นไม่สามารถตัดคำได้**
-3. longest-matching ใช้ Longest matching ในการตัดคำ
-4. mm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย - API ชุดเก่า
-5. newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่ โดยใช้โค้ดคุณ Korakot Chaovavanich  จาก https://www.facebook.com/groups/408004796247683/permalink/431283740586455/ มาพัฒนาต่อ
-6. pylexto ใช้ LexTo ในการตัดคำ โดยเป็น Longest matching
-7. deepcut ใช้ deepcut จาก https://github.com/rkcosmos/deepcut ในการตัดคำภาษาไทย
-8. wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-
-คืนค่าเป็น ''list'' เช่น ['แมว','กิน']
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-g=word_tokenize(text,engine='wordcutpy') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-#### dict_word_tokenize
-
-```python
-from pythainlp.tokenize import dict_word_tokenize
-dict_word_tokenize(text,file,engine)
-```
-
-เป็นคำสั่งสำหรับตัดคำโดยใช้ข้อมูลที่ผู้ใช้กำหนด
-
-text คือ ข้อความที่ต้องการตัดคำ
-
-file คือ ที่ตั้งไฟล์ที่ต้องการมาเป็นฐานข้อมูลตัดคำ
-
-engine คือ เครื่องมือตัดคำ
-
-- newmm ตัดคำด้วย newmm
-- wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-- mm ตัดคำด้วย mm
-- longest-matching ตัดคำโดยใช้ longest matching
-
-ตัวอย่างการใช้งาน https://gist.github.com/wannaphongcom/1e862583051bf0464b6ef4ed592f739c
-
-#### sent_tokenize
-
-ใช้ตัดประโยคภาษาไทย
-
-```python
-sent_tokenize(text,engine='whitespace+newline')
-```
-
-text คือ ข้อความในรูปแบบสตริง
-
-engine คือ เครื่องมือสำหรับใช้ตัดประโยค
-
-- whitespace ตัดประโยคจากช่องว่าง
-- whitespace+newline ตัดประโยคจากช่องว่างและตัดจากการขึ้นบรรทัดใหม่
-
-คืนค่า ออกมาเป็น list
-
-#### WhitespaceTokenizer
-
-ใช้ตัดคำ/ประโยคจากช่องว่างในสตริง
-
-```python
->>> from pythainlp.tokenize import WhitespaceTokenizer
->>> WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง")
-['ทดสอบ', 'ตัดคำช่องว่าง']
-```
-
-#### isthai
-
-ใช้เช็คข้อความว่าเป็นภาษาไทยทั้งหมดกี่ %
-
-```python
-isthai(text,check_all=False)
-```
-
-text คือ ข้อความหรือ list ตัวอักษร
-
-check_all สำหรับส่งคืนค่า True หรือ False เช็คทุกตัวอักษร
-
-**การส่งคืนค่า**
-
-```python
-{'thai':% อักษรภาษาไทย,'check_all':tuple โดยจะเป็น (ตัวอักษร,True หรือ False)}
-```
-
-#### Thai Character Clusters (TCC)
-
-PyThaiNLP 1.4 รองรับ Thai Character Clusters (TCC) โดยจะแบ่งกลุ่มด้วย /
-
-**เดติด**
-
-TCC : Mr.Jakkrit TeCho
-
-grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-โค้ด : คุณ Korakot Chaovavanich 
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-#### Enhanced Thai Character Cluster (ETCC)
-
-นอกจาก TCC แล้ว PyThaiNLP 1.4 ยังรองรับ Enhanced Thai Character Cluster (ETCC) โดยแบ่งกลุ่มด้วย /
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-### keywords
-
-ใช้หา keywords จากข้อความภาษาไทย
-
-#### find_keyword
-
-การทำงาน หาคำที่ถูกใช้งานมากกว่าค่าขั้นต่ำที่กำหนดได้ โดยจะลบ stopword ออกไป
-
-```python
-find_keyword(word_list,lentext=3)
-```
-
-word_list คือ list ของข้อความที่ผ่านการตัดคำแล้ว
-
-lentext คือ จำนวนคำขั้นต่ำที่ต้องการหา keyword
-
-คืนค่าออกมาเป็น dict
-
-### tag
-
-เป็น Part-of-speech tagging ภาษาไทย
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
-
-engine คือ ชุดเครื่องมือในการ postaggers มี 2 ตัวดังนี้
-
-1. old เป็น UnigramTagger (ค่าเริ่มต้น)
-2. artagger เป็น RDR POS Tagger ละเอียดยิ่งกว่าเดิม รองรับเฉพาะ Python 3 เท่านั้น
-
-### romanization
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='pyicu')
-```
-มี 2 engine ดังนี้
-
-- pyicu ส่งค่า Latin
-- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน (**หากมีข้อผิดพลาด ให้ใช้คำอ่าน เนื่องจากตัว royin ไม่มีตัวแปลงคำเป็นคำอ่าน**)
-
-data :
-
-รับค่า ''str'' ข้อความ 
-
-คืนค่าเป็น ''str'' ข้อความ
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### spell 
-
-เป็น API สำหรับเช็คคำผิดในภาษาไทย 
-
-```python
-spell(word,engine='pn')
-```
-
-engine ที่รองรับ
-
-- pn พัฒนามาจาก Peter Norvig (ค่าเริ่มต้น)
-- hunspell ใช้ hunspell (ไม่รองรับ Python 2.7)
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-#### pn
-
-```python
-correction(word)
-```
-
-แสดงคำที่เป็นไปได้มากที่สุด
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell.pn import correction
-a=correction("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-
-ผลลัพธ์
-
-```
-สี่เหลี่ยม
-```
-
-### pythainlp.number
-
-```python
-from pythainlp.number import *
-```
-จัดการกับตัวเลข โดยมีดังนี้
-
-- thai_num_to_num(str)  - เป็นการแปลงเลขไทยสู่เลข
-- thai_num_to_text(str) - เลขไทยสู่ข้อความ
-- num_to_thai_num(str) - เลขสู่เลขไทย
-- num_to_text(str) - เลขสู่ข้อความ
-- text_to_num(str) - ข้อความสู่เลข
-- numtowords(float) -  อ่านจำนวนตัวเลขภาษาไทย (บาท) รับค่าเป็น ''float'' คืนค่าเป็น  'str'
-
-### collation
-
-ใช้ในการเรียงลำดับข้อมูลภาษาไทยใน List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-รับ list คืนค่า list
-
-### date
-
-#### now
-
-รับเวลาปัจจุบันเป็นภาษาไทย
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### rank
-
-#### rank
-
-หาคำที่มีจำนวนการใช้งานมากที่สุด
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-คืนค่าออกมาเป็น dict
-
-**ตัวอย่างการใช้งาน**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### change
-
-#### แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
-
-```python
-from pythainlp.change import *
-```
-
-มีคำสั่งดังนี้
-
-- texttothai(str) แปลงแป้นตัวอักษรภาษาอังกฤษเป็นภาษาไทย
-- texttoeng(str) แปลงแป้นตัวอักษรภาษาไทยเป็นภาษาอังกฤษ
-
-คืนค่าออกมาเป็น str
-
-### soundex
-
-เดติด คุณ Korakot Chaovavanich (จาก https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-กฎที่รองรับในเวชั่น 1.4
-
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ  วิชิตหล่อจีระชุณห์กุล  และ  เจริญ  คุวินทร์พันธุ์ - LK82
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ วรรณี อุดมพาณิชย์ - Udom83
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.soundex import LK82,Udom83
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Meta Sound ภาษาไทย
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### sentiment
-
-เป็น Sentiment analysis ภาษาไทย ใช้ข้อมูลจาก [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-รับค่า str ส่งออกเป็น pos , neg
-
-### Util
-
-การใช้งาน
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-สำหรับสร้าง n-grams 
-
-```python
-ngrams(token,num)
-```
-
-- token คือ list
-- num คือ จำนวน ngrams
-
-#### bigrams
-
-สำหรับสร้าง bigrams
-
-```python
-bigrams(token)
-```
-
-- token คือ list
-
-#### trigram
-
-สำหรับสร้าง trigram
-
-```python
-trigram(token)
-```
-
-- token คือ list
-
-#### normalize
-
-ซ่อมข้อความภาษาไทย เช่น กี่่่ ไปเป็น กี่
-
-```python
-normalize(text)
-```
-
-**ตัวอย่าง**
-
-```python
->>> print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
-True
-```
-
-### Corpus
-
-#### WordNet ภาษาไทย
-
-เรียกใช้งาน
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**การใช้งาน**
-
-API เหมือนกับ NLTK โดยรองรับ API ดังนี้
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**ตัวอย่าง**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-#### stopword ภาษาไทย
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### ชื่อประเทศ ภาษาไทย
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### ตัววรรณยุกต์ในภาษาไทย
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### ตัวพยัญชนะในภาษาไทย
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### รายการคำในภาษาไทย
-
-```python
-from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
-get_data()
-from pythainlp.corpus.newthaiword import get_data # ข้อมูลใหม่
-get_data()
-```
-
-#### provinces
-
-สำหรับจัดการชื่อจังหวัดในประเทศไทย
-
-##### get_data
-
-รับข้อมูลชื่อจังหวัดในประเทศไทบ
-
-```python
-get_data()
-```
-
-คือค่าออกมาเป็น list
-
-##### parsed_docs
-
-สำหรับใช้ Tag ชื่อจังหวัดในประเทศไทย
-
-```python
-parsed_docs(text_list)
-```
-
-text_list คือ ข้อความภาษาไทยที่อยู่ใน list โดยผ่านการตัดคำมาแล้ว
-
-**ตัวอย่าง**
-
-```python
->>> d=['หนองคาย', 'เป็น', 'เมือง', 'น่าอยู่', 'อันดับ', 'ต้น', 'ๆ', 'ของ', 'โลก', 'นอกจากนี้', 'ยัง', 'มี', 'เชียงใหม่']
->>> parsed_docs(d)
-["[LOC : 'หนองคาย']", 'เป็น', 'เมือง', 'น่าอยู่', 'อันดับ', 'ต้น', 'ๆ', 'ของ', 'โลก', 'นอกจากนี้', 'ยัง', 'มี', "[LOC : 'เชียงใหม่']"]
-```
-
-#### ConceptNet
-
-เครื่องมือสำหรับ ConceptNet.
-
-**ค้นหา edges**
-
-```python
-edges(word,lang='th')
-```
-
-return dict
-
-#### TNC
-
-สำหรับใช้จัดการกับ Thai National Corpus (http://www.arts.chula.ac.th/~ling/TNC/index.php)
-
-##### word_frequency
-
-ใช้วัดความถี่ของคำ
-
-```python
-word_frequency(word,domain='all')
-```
-
-word คือ คำ
-
-domain คือ หมวดหมู่ของคำ
-
-มีหมวดหมู่ดังนี้
-
-- all
-- imaginative
-- natural-pure-science
-- applied-science
-- social-science
-- world-affairs-history
-- commerce-finance
-- arts
-- belief-thought
-- leisure
-- others
-
-เขียนโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
diff --git a/docs/archive/pythainlp-1-6-eng.md b/docs/archive/pythainlp-1-6-eng.md
deleted file mode 100644
index df1c9f16e..000000000
--- a/docs/archive/pythainlp-1-6-eng.md
+++ /dev/null
@@ -1,502 +0,0 @@
-# User manual PyThaiNLP 1.6
-
-[TOC]
-
-## API
-
-### tokenize
-
-#### word_tokenize
-
-word_tokenize is thai word segmatation.
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-**text** refers to an input text string in Thai.
-
-**engine** refers to a thai word segmentation system; There are 6 systems to choose from.
-
-1. newmm  (default) - Maximum Matching algorithm for Thai word segmatation. Developed by Korakot Chaovavanich (https://www.facebook.com/groups/408004796247683/permalink/431283740586455/)
-2. icu - pyicu has a very poor performance. 
-3. dict - dictionary-based tokenizer. It returns False if the message can not be wrapped.
-4. longest-matching - using Longest matching algorithm for Thai word segmentation.
-5. mm - Maximum Matching algorithm for Thai word segmentation.
-6. pylexto - LexTo.
-7. deepcut - Deep Learning based Thai word segmentation (https://github.com/rkcosmos/deepcut)
-8. wordcutpy - use wordcutpy (https://github.com/veer66/wordcutpy)  for Thai word segmentation.
-
-
-Output: ''list'' ex. ['แมว','กิน']
-
-**Example**
-
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-g=word_tokenize(text,engine='wordcutpy') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-#### dict_word_tokenize
-
-```python
-from pythainlp.tokenize import dict_word_tokenize
-dict_word_tokenize(text,file,engine)
-```
-
-A command for tokenize by using user-defined information.
-
-text : str
-
-file : name file data using in tokenize.
-
-engine
-
-- newmm
-- wordcutpy : using wordcutpy (https://github.com/veer66/wordcutpy)
-- mm
-- longest-matching
-
-Example https://gist.github.com/wannaphongcom/1e862583051bf0464b6ef4ed592f739c
-
-#### sent_tokenize
-
-Thai Sentence Tokenizer
-
-```python
-sent_tokenize(text,engine='whitespace+newline')
-```
-
-engine :
-
-- whitespace - tokenizer from whitespace
-- whitespace+newline - tokenizer from whitespace and newline.
-
-#### Thai Character Clusters (TCC)
-
-TCC : Mr.Jakkrit TeCho
-
-grammar :  Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-Code :  Korakot Chaovavanich 
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-#### Enhanced Thai Character Cluster (ETCC)
-
-**Example**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-#### WhitespaceTokenizer
-
-Tokenizer by using spaces
-
-```python
->>> from pythainlp.tokenize import WhitespaceTokenizer
->>> WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง")
-['ทดสอบ', 'ตัดคำช่องว่าง']
-```
-#### isthai
-
-check
-
-### Thai postaggers
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-engine
-
-1. old is the UnigramTagger (default)
-2. artagger is the RDR POS Tagger.
-
-### romanization
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='pyicu')
-```
-There are 2 engines
-
-- pyicu
-- royin
-
-data :
-
-input ''str''
-
-returns ''str'' 
-
-**Example**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'mæw'
-```
-
-### keywords
-
-#### find_keyword
-
-find keywords from thai text in list.
-
-```python
-find_keyword(list,lentext=3)
-```
-lentext is minimum number of keywords words.
-
-return dict {words: number of keywords}
-
-### Spell Check 
-
-```python
-spell(word,engine='pn')
-```
-engine
-
-- 'pn' code from Peter Norvig
-- 'hunspell' using hunspell
-
-Before using this module,  please install hunspell and hunspell-th.
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-#### pn
-
-```python
-correction(word)
-```
-
-Show word possible
-
-**Sample usage**
-
-```python
-from pythainlp.spell.pn import correction
-a=correction("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-### number
-
-```python
-from pythainlp.number import *
-```
-- nttn(str)  - convert thai numbers to numbers.
-- nttt(str) - Thai Numbers to text.
-- ntnt(str) - numbers to thai numbers.
-- ntt(str) -  numbers to text.
-- ttn(str) - text to  numbers.
-- numtowords(float) -  Read thai numbers (Baht) input ''float'' returns  'str'
-
-### Sort Thai text into List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-input list 
-
-returns list
-
-### Get current time in Thai
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### Find the most frequent words.
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-returns dict
-
-**Example**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### Incorrect input language correction
-
-```python
-from pythainlp.change import *
-```
-
-- texttothai(str) - eng to thai.
-- texttoeng(str) - thai to eng.
-
-### word_vector
-
-```python
-from pythainlp.word_vector import thai2vec
-```
-
-word_vector is word vector in PyThaiNLP
-
-It's work using thai2vec (https://github.com/cstorm125/thai2vec)
-
-thai2vec developed by Charin Polpanumas
-
-#### thai2vec
-
-requirements
-
-- gensim
-- numpy
-
-##### API
-
-- get_model() - get gensim model
-- most_similar_cosmul(positive,negative)
-- doesnt_match(listdata)
-- similarity(word1,word2)
-- sentence_vectorizer(ss,dim=300,use_mean=False)
-- about()
-
-### Thai Soundex
-
-credit Korakot Chaovavanich (from https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-- LK82
-- Udom83
-
-**Example**
-
-```python
->>> from pythainlp.soundex import LK82,Udom83
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Thai meta sound
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**Example**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### Thai sentiment analysis
-
-using data from [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-input str returns pos , neg or neutral
-
-### Util
-
-using
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-for building ngrams 
-
-```python
-ngrams(token,num)
-```
-
-- token - list
-- num - ngrams
-
-#### bigrams
-
-for building bigrams
-
-```python
-bigrams(token)
-```
-
-- token - list
-
-#### trigram
-
-for building trigram
-
-```python
-trigram(token)
-```
-
-- token - list
-
-#### normalize
-
-fix thai text
-
-```python
-normalize(text)
-```
-
-**Example**
-
-```python
->>> print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
-True
-```
-
-### Corpus
-
-#### Thai stopword
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### Thai country name
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### Tone in Thai
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### Consonant in thai
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### Word list in thai
-
-```python
-from pythainlp.corpus.thaiword import get_data # old data
-get_data()
-from pythainlp.corpus.newthaiword import get_data # new data
-get_data()
-```
-
-#### ConceptNet
-
-Thai tool for ConceptNet.
-
-**find edges**
-
-```python
-edges(word,lang='th')
-```
-
-return dict
-
-#### Thai WordNet
-
-import
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**Use**
-
-It's like nltk.
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**Example**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-#### TNC
-
-Tool for Thai National Corpus (http://www.arts.chula.ac.th/~ling/TNC/index.php)
-
-##### word_frequency
-
-find word frequency
-
-```python
-word_frequency(word,domain='all')
-```
-domain
-
-- all
-- imaginative
-- natural-pure-science
-- applied-science
-- social-science
-- world-affairs-history
-- commerce-finance
-- arts
-- belief-thought
-- leisure
-- others
\ No newline at end of file
diff --git a/docs/archive/pythainlp-1-6-thai.md b/docs/archive/pythainlp-1-6-thai.md
deleted file mode 100644
index b7e5b67a9..000000000
--- a/docs/archive/pythainlp-1-6-thai.md
+++ /dev/null
@@ -1,683 +0,0 @@
-# คู่มือการใช้งาน PyThaiNLP 1.6
-
-[TOC]
-
-**เอกสารใหม่ย้ายไปที่ https://thainlp.org/pythainlp/docs/1.7/***
-
-Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน !**
-
-> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
-
-รองรับ Python 2.7 และ Python 3.4 ขึ้นไปเท่านั้น
-
-ติดตั้งใช้คำสั่ง
-
-```
-pip install pythainlp
-```
-
-ปัจจุบันนี้ PyThaiNLP ไม่ต้องการ PyICU ในการใช้งาน API อีกแล้ว แต่หากท่านต้องการใช้ API ที่มี PyICU ให้ทำตามคำแนะนำข้างล่างนี้
-
-**วิธีติดตั้งสำหรับ Windows**
-
-การติดตั้ง pythainlp บน windows ต้องติดตั้ง pyicu ซึ่งทำได้ยากมาก
-วิธีที่ง่ายที่สุดคือใช้ wheel
-
-1. [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) แล้ว download wheel ตาม python ตัวเองเช่น
-    ผมใช้ python x64 3.6.1 บน Windows ก็ให้ใช้ PyICU‑1.9.7‑cp36‑cp36m‑win_amd64.whl
-
-2. `pip install PyICU‑1.9.7‑cp36‑cp36m‑win_amd64.whl`
-
-3. `pip install pythainlp`
-
-**ติดตั้งบน Mac**
-
-** แนะนำให้ใช้ icu 58.2 เนื่องจาก icu 59.1 มาปัญหากับ PyICU **
-
-```sh
-$ brew install icu4c --force
-$ brew link --force icu4c
-$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
-```
-
-ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
-
-## API
-
-### tokenize
-
-#### word_tokenize
-
-สำหรับการตัดคำไทยนั้น ใช้ API ดังต่อไปนี้
-
-```python
-from pythainlp.tokenize import word_tokenize
-word_tokenize(text,engine)
-```
-text คือ ข้อความในรูปแบบสตริง str เท่านั้น
-
-engine คือ ระบบตัดคำไทย ปัจจุบันนี้ PyThaiNLP ได้พัฒนามี 6 engine ให้ใช้งานกันดังนี้
-
-1. newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่ โดยใช้โค้ดคุณ Korakot Chaovavanich  จาก https://www.facebook.com/groups/408004796247683/permalink/431283740586455/ มาพัฒนาต่อ (ค่าเริ่มต้น)
-2. icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ)
-3. dict - เป็นการตัดคำโดยใช้พจานุกรมจาก thaiword.txt ใน corpus  (ความแม่นยำปานกลาง) **จะคืนค่า False หากข้อความนั้นไม่สามารถตัดคำได้**
-4. longest-matching ใช้ Longest matching ในการตัดคำ
-5. mm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย - API ชุดเก่า **อยู่ในหมวดบำรุงรักษาเท่านั้น**
-6. pylexto ใช้ LexTo ในการตัดคำ โดยเป็น Longest matching
-7. deepcut ใช้ deepcut จาก https://github.com/rkcosmos/deepcut ในการตัดคำภาษาไทย
-8. wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-
-คืนค่าเป็น ''list'' เช่น ['แมว','กิน']
-
-**ตัวอย่าง**
-
-```
-สำหรับผู้ใช้งาน Python 2.7 ให้ทำการ encode ให้เป็น UTF-8 ก่อนใช้งานโมดูล PyThaiNLP
-
-เช่น text=u'ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-```
-
-**การใช้งาน**
-
-```python
-from pythainlp.tokenize import word_tokenize
-text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
-b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
-g=word_tokenize(text,engine='wordcutpy') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้านเกิด']
-```
-
-#### dict_word_tokenize
-
-```python
-from pythainlp.tokenize import dict_word_tokenize
-dict_word_tokenize(text,file,engine)
-```
-
-เป็นคำสั่งสำหรับตัดคำโดยใช้ข้อมูลที่ผู้ใช้กำหนด
-
-text คือ ข้อความที่ต้องการตัดคำ
-
-file คือ ที่ตั้งไฟล์ที่ต้องการมาเป็นฐานข้อมูลตัดคำ
-
-engine คือ เครื่องมือตัดคำ
-
-- newmm ตัดคำด้วย newmm
-- wordcutpy ใช้ wordcutpy (https://github.com/veer66/wordcutpy) ในการตัดคำ
-- mm ตัดคำด้วย mm
-- longest-matching ตัดคำโดยใช้ longest matching
-
-ตัวอย่างการใช้งาน https://gist.github.com/wannaphongcom/1e862583051bf0464b6ef4ed592f739c
-
-```
-สำหรับผู้ใช้งาน Python 2.7 ให้ทำการ encode ให้เป็น UTF-8 ก่อนใช้งานโมดูล PyThaiNLP
-
-เช่น text=u'ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
-```
-
-#### sent_tokenize
-
-ใช้ตัดประโยคภาษาไทย
-
-```python
-sent_tokenize(text,engine='whitespace+newline')
-```
-
-text คือ ข้อความในรูปแบบสตริง
-
-engine คือ เครื่องมือสำหรับใช้ตัดประโยค
-
-- whitespace ตัดประโยคจากช่องว่าง
-- whitespace+newline ตัดประโยคจากช่องว่างและตัดจากการขึ้นบรรทัดใหม่
-
-คืนค่า ออกมาเป็น list
-
-#### WhitespaceTokenizer
-
-ใช้ตัดคำ/ประโยคจากช่องว่างในสตริง
-
-```python
->>> from pythainlp.tokenize import WhitespaceTokenizer
->>> WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง")
-['ทดสอบ', 'ตัดคำช่องว่าง']
-```
-
-```
-สำหรับผู้ใช้งาน Python 2.7 ให้ทำการ encode ให้เป็น UTF-8 ก่อนใช้งานโมดูล PyThaiNLP
-
-เช่น WhitespaceTokenizer(u"ทดสอบ ตัดคำช่องว่าง")
-```
-
-
-
-#### isthai
-
-ใช้เช็คข้อความว่าเป็นภาษาไทยทั้งหมดกี่ %
-
-```python
-isthai(text,check_all=False)
-```
-
-text คือ ข้อความหรือ list ตัวอักษร
-
-check_all สำหรับส่งคืนค่า True หรือ False เช็คทุกตัวอักษร
-
-**การส่งคืนค่า**
-
-```python
-{'thai':% อักษรภาษาไทย,'check_all':tuple โดยจะเป็น (ตัวอักษร,True หรือ False)}
-```
-
-#### Thai Character Clusters (TCC)
-
-รองรับ Thai Character Clusters (TCC) โดยจะแบ่งกลุ่มด้วย /
-
-**เครดิต**
-
-TCC : Mr.Jakkrit TeCho
-
-grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-
-โค้ด : คุณ Korakot Chaovavanich 
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import tcc
->>> tcc.tcc('ประเทศไทย')
-'ป/ระ/เท/ศ/ไท/ย'
-```
-
-#### Enhanced Thai Character Cluster (ETCC)
-
-นอกจาก TCC แล้ว PyThaiNLP 1.4 ยังรองรับ Enhanced Thai Character Cluster (ETCC) โดยแบ่งกลุ่มด้วย /
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.tokenize import etcc
->>> etcc.etcc('คืนความสุข')
-'/คืน/ความสุข'
-```
-
-### tag
-
-เป็น Part-of-speech tagging ภาษาไทย
-
-```python
-from pythainlp.tag import pos_tag
-pos_tag(list,engine='old')
-```
-
-list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
-
-engine คือ ชุดเครื่องมือในการ postaggers มี 2 ตัวดังนี้
-
-1. old เป็น UnigramTagger (ค่าเริ่มต้น)
-2. artagger เป็น RDR POS Tagger ละเอียดยิ่งกว่าเดิม รองรับเฉพาะ Python 3 เท่านั้น
-
-### summarize
-
-เป็นระบบสรุปเอกสารภาษาไทยแบบง่าย ๆ
-
-summarize_text(text,n,engine='frequency')
-
-    text เป็นข้อความ
-    n คือ จำนวนประโยคสรุป
-    engine ที่รองรับ
-    - frequency
-**การใช้งาน**
-
-```python
->>> from pythainlp.summarize import summarize_text
->>> summarize_text(text="อาหาร หมายถึง ของแข็งหรือของเหลว ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว จะทำให้เกิดพลังงานและความร้อนยเจริญเติบโต ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ อาหารจะต้องงกาย",n=1,engine='frequency')
-['อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย']
-```
-
-### word_vector
-
-```python
-from pythainlp.word_vector import thai2vec
-```
-
-word_vector เป็นระบบ word vector ใน PyThaiNLP
-
-ปัจจุบันนี้รองรับเฉพาะ thai2vec (https://github.com/cstorm125/thai2vec)
-
-thai2vec พัฒนาโดยคุณ Charin Polpanumas
-
-#### thai2vec
-
-ความต้องการโมดูล
-
-- gensim
-- numpy
-
-##### API
-
-- get_model() - รับข้อมูล model ในรูปแบบของ gensim
-- most_similar_cosmul(positive,negative)
-- doesnt_match(listdata)
-- similarity(word1,word2) - หาค่าความคล้ายกันระหว่าง 2 คำ โดยทั้งคู่เป็น str
-- sentence_vectorizer(ss,dim=300,use_mean=False)
-- about() - รายละเอียด thai2vec
-
-
-
-### keywords
-
-ใช้หา keywords จากข้อความภาษาไทย
-
-#### find_keyword
-
-การทำงาน หาคำที่ถูกใช้งานมากกว่าค่าขั้นต่ำที่กำหนดได้ โดยจะลบ stopword ออกไป
-
-```python
-find_keyword(word_list,lentext=3)
-```
-
-word_list คือ list ของข้อความที่ผ่านการตัดคำแล้ว
-
-lentext คือ จำนวนคำขั้นต่ำที่ต้องการหา keyword
-
-คืนค่าออกมาเป็น dict
-
-### romanization
-
-```python
-from pythainlp.romanization import romanization
-romanization(str,engine='royin')
-```
-มี 2 engine ดังนี้
-
-- pyicu ส่งค่า Latin
-- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน (**หากมีข้อผิดพลาด ให้ใช้คำอ่าน เนื่องจากตัว royin ไม่มีตัวแปลงคำเป็นคำอ่าน**) 
-
-data :
-
-รับค่า ''str'' ข้อความ 
-
-คืนค่าเป็น ''str'' ข้อความ
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.romanization import romanization
-romanization("แมว") # 'maew'
-```
-
-### spell 
-
-เป็น API สำหรับเช็คคำผิดในภาษาไทย 
-
-```python
-spell(word,engine='pn')
-```
-
-engine ที่รองรับ
-
-- pn พัฒนามาจาก Peter Norvig (ค่าเริ่มต้น)
-- hunspell ใช้ hunspell (ไม่รองรับ Python 2.7)
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell import *
-a=spell("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-#### pn
-
-```python
-correction(word)
-```
-
-แสดงคำที่เป็นไปได้มากที่สุด
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell.pn import correction
-a=correction("สี่เหลียม")
-print(a) # ['สี่เหลี่ยม']
-```
-
-ผลลัพธ์
-
-```
-สี่เหลี่ยม
-```
-
-### pythainlp.number
-
-```python
-from pythainlp.number import *
-```
-จัดการกับตัวเลข โดยมีดังนี้
-
-- thai_num_to_num(str)  - เป็นการแปลงเลขไทยสู่เลข
-- thai_num_to_text(str) - เลขไทยสู่ข้อความ
-- num_to_thai_num(str) - เลขสู่เลขไทย
-- num_to_text(str) - เลขสู่ข้อความ
-- text_to_num(str) - ข้อความสู่เลข
-- numtowords(float) -  อ่านจำนวนตัวเลขภาษาไทย (บาท) รับค่าเป็น ''float'' คืนค่าเป็น  'str'
-
-### collation
-
-ใช้ในการเรียงลำดับข้อมูลภาษาไทยใน List
-
-```python
-from pythainlp.collation import collation
-print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
-```
-
-รับ list คืนค่า list
-
-### date
-
-#### now
-
-รับเวลาปัจจุบันเป็นภาษาไทย
-
-```python
-from pythainlp.date import now
-now() # '30 พฤษภาคม 2560 18:45:24'
-```
-### rank
-
-#### rank
-
-หาคำที่มีจำนวนการใช้งานมากที่สุด
-
-```python
-from pythainlp.rank import rank
-rank(list)
-```
-
-คืนค่าออกมาเป็น dict
-
-**ตัวอย่างการใช้งาน**
-
-```python
->>> rank(['แมง','แมง','คน'])
-Counter({'แมง': 2, 'คน': 1})
-```
-
-### change
-
-#### แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
-
-```python
-from pythainlp.change import *
-```
-
-มีคำสั่งดังนี้
-
-- texttothai(str) แปลงแป้นตัวอักษรภาษาอังกฤษเป็นภาษาไทย
-- texttoeng(str) แปลงแป้นตัวอักษรภาษาไทยเป็นภาษาอังกฤษ
-
-คืนค่าออกมาเป็น str
-
-### soundex
-
-เดติด คุณ Korakot Chaovavanich (จาก https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
-
-กฎที่รองรับในเวชั่น 1.4
-
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ  วิชิตหล่อจีระชุณห์กุล  และ  เจริญ  คุวินทร์พันธุ์ - LK82
-- กฎการเข้ารหัสซาวน์เด็กซ์ของ วรรณี อุดมพาณิชย์ - Udom83
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.soundex import LK82,Udom83
->>> print(LK82('รถ'))
-ร3000
->>> print(LK82('รด'))
-ร3000
->>> print(LK82('จัน'))
-จ4000
->>> print(LK82('จันทร์'))
-จ4000
->>> print(Udom83('รถ'))
-ร800000
-```
-
-### Meta Sound ภาษาไทย
-
-```
-Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
-```
-
-**การใช้งาน**
-
-```python
->>> from pythainlp.MetaSound import *
->>> MetaSound('คน')
-'15'
-```
-
-### sentiment
-
-เป็น Sentiment analysis ภาษาไทย ใช้ข้อมูลจาก [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
-
-```python
-from pythainlp.sentiment import sentiment
-sentiment(str)
-```
-
-รับค่า str ส่งออกเป็น pos , neg
-
-### Util
-
-การใช้งาน
-
-```python
-from pythainlp.util import *
-```
-
-#### ngrams
-
-สำหรับสร้าง n-grams 
-
-```python
-ngrams(token,num)
-```
-
-- token คือ list
-- num คือ จำนวน ngrams
-
-#### bigrams
-
-สำหรับสร้าง bigrams
-
-```python
-bigrams(token)
-```
-
-- token คือ list
-
-#### trigram
-
-สำหรับสร้าง trigram
-
-```python
-trigram(token)
-```
-
-- token คือ list
-
-#### normalize
-
-ซ่อมข้อความภาษาไทย เช่น กี่่่ ไปเป็น กี่
-
-```python
-normalize(text)
-```
-
-**ตัวอย่าง**
-
-```python
->>> print(normalize("เเปลก")=="แปลก") # เ เ ป ล ก กับ แปลก
-True
-```
-
-### Corpus
-
-#### WordNet ภาษาไทย
-
-เรียกใช้งาน
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**การใช้งาน**
-
-API เหมือนกับ NLTK โดยรองรับ API ดังนี้
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**ตัวอย่าง**
-
-```python
->>> from pythainlp.corpus import wordnet
->>> print(wordnet.synsets('หนึ่ง'))
-[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
->>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
-[]
->>> print(wordnet.synset('one.s.05'))
-Synset('one.s.05')
->>> print(wordnet.synset('spy.n.01').lemmas())
-[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
->>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
-['สปาย', 'สายลับ']
-```
-
-#### stopword ภาษาไทย
-
-```python
-from pythainlp.corpus import stopwords
-stopwords = stopwords.words('thai')
-```
-
-#### ชื่อประเทศ ภาษาไทย
-
-```python
-from pythainlp.corpus import country
-country.get_data()
-```
-
-#### ตัววรรณยุกต์ในภาษาไทย
-
-```python
-from pythainlp.corpus import tone
-tone.get_data()
-```
-
-#### ตัวพยัญชนะในภาษาไทย
-
-```python
-from pythainlp.corpus import alphabet
-alphabet.get_data()
-```
-
-#### รายการคำในภาษาไทย
-
-```python
-from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
-get_data()
-from pythainlp.corpus.newthaiword import get_data # ข้อมูลใหม่
-get_data()
-```
-
-#### provinces
-
-สำหรับจัดการชื่อจังหวัดในประเทศไทย
-
-##### get_data
-
-รับข้อมูลชื่อจังหวัดในประเทศไทบ
-
-```python
-get_data()
-```
-
-คือค่าออกมาเป็น list
-
-##### parsed_docs
-
-สำหรับใช้ Tag ชื่อจังหวัดในประเทศไทย
-
-```python
-parsed_docs(text_list)
-```
-
-text_list คือ ข้อความภาษาไทยที่อยู่ใน list โดยผ่านการตัดคำมาแล้ว
-
-**ตัวอย่าง**
-
-```python
->>> d=['หนองคาย', 'เป็น', 'เมือง', 'น่าอยู่', 'อันดับ', 'ต้น', 'ๆ', 'ของ', 'โลก', 'นอกจากนี้', 'ยัง', 'มี', 'เชียงใหม่']
->>> parsed_docs(d)
-["[LOC : 'หนองคาย']", 'เป็น', 'เมือง', 'น่าอยู่', 'อันดับ', 'ต้น', 'ๆ', 'ของ', 'โลก', 'นอกจากนี้', 'ยัง', 'มี', "[LOC : 'เชียงใหม่']"]
-```
-
-#### ConceptNet
-
-เครื่องมือสำหรับ ConceptNet
-
-**ค้นหา edges**
-
-```python
-edges(word,lang='th')
-```
-
-return dict
-
-#### TNC
-
-สำหรับใช้จัดการกับ Thai National Corpus (http://www.arts.chula.ac.th/~ling/TNC/index.php)
-
-##### word_frequency
-
-ใช้วัดความถี่ของคำ
-
-```python
-word_frequency(word,domain='all')
-```
-
-word คือ คำ
-
-domain คือ หมวดหมู่ของคำ
-
-มีหมวดหมู่ดังนี้
-
-- all
-- imaginative
-- natural-pure-science
-- applied-science
-- social-science
-- world-affairs-history
-- commerce-finance
-- arts
-- belief-thought
-- leisure
-- others
-
-เขียนโดย PyThaiNLP
diff --git a/docs/archive/pythainlp-1-7.md b/docs/archive/pythainlp-1-7.md
deleted file mode 100644
index 09a8b76a0..000000000
--- a/docs/archive/pythainlp-1-7.md
+++ /dev/null
@@ -1 +0,0 @@
-See https://thainlp.org/pythainlp/docs/1.7/
diff --git a/docs/archive/pythainlp-dev-thai.md b/docs/archive/pythainlp-dev-thai.md
deleted file mode 100644
index 943b51d7f..000000000
--- a/docs/archive/pythainlp-dev-thai.md
+++ /dev/null
@@ -1,599 +0,0 @@
-# คู่มือการใช้งาน PyThaiNLP
-
-[TOC]
-
-โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อประมวลภาษาไทยด้วยภาษาโปรแกรม Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน!**
-
-> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
-
-รองรับ Python 3.4 ขึ้นไปเท่านั้น
-
-ติดตั้งโดยใช้คำสั่ง
-
-```
-pip install pythainlp
-```
-
-ปัจจุบัน PyThaiNLP ไม่จำเป็นต้องใช้ PyICU แล้ว แต่หากท่านต้องการใช้ API ที่มี PyICU ให้ทำตามคำแนะนำข้างล่างนี้
-
-**ติดตั้ง PyICU บน Windows**
-
-การติดตั้ง PyThaiNLP บน Windows ต้องติดตั้ง PyICU ก่อน วิธีที่ง่ายที่สุดคือใช้ wheel ที่ถูกสร้างมาก่อนแล้ว
-
-1. ดาวน์โหลด wheel ตามแพลตฟอร์มที่ต้องการจาก [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) เช่น Python x64 3.6 บน Windows ให้ใช้ PyICU‑2.x‑cp36‑cp36m‑win_amd64.whl
-
-2. `pip install PyICU‑2.1‑cp36‑cp36m‑win_amd64.whl`
-
-3. `pip install pythainlp`
-
-**ติดตั้ง PyICU บน macOS**
-
-```sh
-brew install icu4c --force
-brew link --force icu4c
-CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
-```
-
-ข้อมูลเพิ่มเติมที่ https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f
-
-## API
-
-### tokenize
-
-#### word_tokenize
-
-ตัดคำภาษาไทย
-
-```python
-from pythainlp.tokenize import word_tokenize
-
-word_tokenize(text, engine)
-```
-text คือ ข้อความในรูปแบบสตริง str เท่านั้น
-
-engine คือ ระบบตัดคำ ปัจจุบันมี engine ดังนี้
-
-- newmm (ค่าเริ่มต้น) - ใช้พจนานุกรม ด้วยวิธี Maximum Matching + Thai Character Cluster โค้ดชุดใหม่[โดยคุณ Korakot Chaovavanich](https://www.facebook.com/groups/408004796247683/permalink/431283740586455/)
-- longest - ใช้พจนานุกรม ด้วยวิธี Longest Matching
-- icu - เรียกใช้ตัวตัดคำจาก ICU ใช้พจนานุกรม (ความแม่นยำต่ำ)
-- deepcut - เรียกใช้ตัวตัดคำจาก [deepcut](https://github.com/rkcosmos/deepcut) ใช้การเรียนรู้ของเครื่อง
-
-คืนค่าเป็น ''list'' เช่น ['แมว', 'กิน']
-
-**การใช้งาน**
-
-```python
-from pythainlp.tokenize import word_tokenize
-
-text = "โอเคบ่เรารักภาษาถิ่น"
-word_tokenize(text, engine="newmm")  # ['โอเค', 'บ่', 'เรา', 'รัก', 'ภาษาถิ่น']
-word_tokenize(text, engine="icu")  # ['โอ', 'เค', 'บ่', 'เรา', 'รัก', 'ภาษา', 'ถิ่น']
-```
-
-#### dict_word_tokenize
-
-ตัดคำโดยใช้พจนานุกรมที่ผู้ใช้กำหนด
-
-```python
-from pythainlp.tokenize import dict_word_tokenize
-dict_word_tokenize(text, filename, engine)
-```
-
-text คือ ข้อความที่ต้องการตัดคำ
-
-filename คือ ที่ตั้งไฟล์ที่ต้องการมาเป็นฐานข้อมูลตัดคำ
-
-engine คือ ระบบตัดคำ (ดูรายละเอียดที่ word_tokenize)
-- newmm
-- longest
-
-ตัวอย่างการใช้งาน https://gist.github.com/wannaphongcom/1e862583051bf0464b6ef4ed592f739c
-
-
-#### sent_tokenize
-
-ตัดประโยคภาษาไทย
-
-```python
-sent_tokenize(text, engine="whitespace+newline")
-```
-
-text คือ ข้อความในรูปแบบสตริง
-
-engine คือ เครื่องมือสำหรับใช้ตัดประโยค
-
-- whitespace ตัดประโยคจากช่องว่าง
-- whitespace+newline ตัดประโยคจากช่องว่างและตัดจากการขึ้นบรรทัดใหม่
-
-คืนค่าเป็น list
-
-#### WhitespaceTokenizer
-
-ใช้ตัดคำ/ประโยคจากช่องว่างในสตริง
-
-```python
-from pythainlp.tokenize import WhitespaceTokenizer
-
-WhitespaceTokenizer("ทดสอบ ตัดคำช่องว่าง")  # ['ทดสอบ', 'ตัดคำช่องว่าง']
-```
-
-
-#### isthai
-
-ตรวจสอบข้อความว่ามีอักษรไทยร้อยละเท่าใด
-
-```python
-isthai(text, check_all=False)
-```
-
-text คือ ข้อความหรือ list ตัวอักษร
-
-check_all สำหรับส่งคืนค่า True หรือ False เช็คทุกตัวอักษร
-
-**การส่งคืนค่า**
-
-```python
-{'thai':% อักษรภาษาไทย,'check_all':tuple โดยจะเป็น (ตัวอักษร,True หรือ False)}
-```
-
-#### Thai Character Clusters (TCC)
-
-รองรับ Thai Character Clusters (TCC) โดยจะแบ่งกลุ่มด้วย /
-
-**เครดิต**
-
-- TCC: Jakkrit TeCho
-- Grammar: Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
-- Python code: Korakot Chaovavanich
-
-**การใช้งาน**
-
-```python
-from pythainlp.tokenize import tcc
-
-tcc.tcc("ประเทศไทย")  # 'ป/ระ/เท/ศ/ไท/ย'
-```
-
-#### Enhanced Thai Character Cluster (ETCC)
-
-นอกจาก TCC แล้ว PyThaiNLP ยังรองรับ Enhanced Thai Character Cluster (ETCC) โดยแบ่งกลุ่มด้วย /
-
-**การใช้งาน**
-
-```python
-from pythainlp.tokenize import etcc
-
-etcc.etcc('คืนความสุข')  # '/คืน/ความสุข'
-```
-
-### tag
-
-Part-of-speech tagging ภาษาไทย
-
-```python
-from pythainlp.tag import pos_tag
-
-pos_tag(text, engine="unigram", corpus="orchid")
-```
-
-list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
-
-engine คือ ตัวติดป้ายกำกับคำ (pos tagger) มีดังนี้
-- unigram (ค่าเริ่มต้น) - UnigramTagger
-- perceptron - PerceptronTagger
-- artagger - RDR POS Tagger ละเอียดยิ่งกว่าเดิม
-
-corpus ที่รองรับ
-- orchid ใช้ข้อมูลจากคลังคำ ORCHID โดยเนคเทค
-- pud ใช้ข้อมูล Parallel Universal Dependencies (PUD) treebanks
-
-### summarize
-
-สรุปเอกสารภาษาไทยแบบง่าย ๆ
-
-```python
-summarize(text, n, engine="frequency")
-```
-
-text เป็นข้อความ
-
-n คือ จำนวนประโยคสรุป
-
-engine ที่รองรับ
-- frequency
-
-**การใช้งาน**
-
-```python
-from pythainlp.summarize import summarize
-
-summarize(text="อาหาร หมายถึง ของแข็งหรือของเหลว ที่กินหรือดื่มเข้าสู่ร่างกายแล้ว จะทำให้เกิดพลังงานและความร้อนยเจริญเติบโต ซ่อมแซมส่วนที่สึกหรอ ควบคุมการเปลี่ยนแปลงต่างๆ ในร่างกาย ช่วยทำให้อวัยวะต่างๆ ทำงานได้อย่างปกติ อาหารจะต้องงกาย", n=1, engine="frequency")
-# ['อาหารจะต้องไม่มีพิษและไม่เกิดโทษต่อร่างกาย']
-```
-
-### word_vector
-
-สร้างเวกเตอร์คำ
-
-
-#### thai2fit
-
-ต้องการโมดูล
-- gensim
-- numpy
-
-##### API
-
-- get_model() - รับข้อมูล model ในรูปแบบของ gensim
-- most_similar_cosmul(positive, negative)
-- doesnt_match(listdata)
-- similarity(word1, word2) - หาค่าความคล้ายระหว่าง 2 คำ โดยทั้งคู่เป็น str
-- sentence_vectorizer(ss, dim=300, use_mean=False)
-
-### keywords
-
-หาคำสำคัญจากข้อความภาษาไทย
-
-#### find_keyword
-
-การทำงาน หาคำที่ถูกใช้งานมากกว่าค่าขั้นต่ำที่กำหนดได้ โดยจะลบ stopword ออก
-
-```python
-from pythainlp.util import find_keyword
-
-find_keyword(word_list, lentext=3)
-```
-
-word_list คือ list ของข้อความที่ตัดคำแล้ว
-
-lentext คือ จำนวนคำขั้นต่ำที่ต้องการหา keyword
-
-คืนค่าเป็น dict
-
-### transliteration
-
-```python
-from pythainlp.transliterate import romanize, transliterate
-
-romanize(str, engine="royin")
-transliterate(str, engine="pyicu")
-```
-
-มี engine ดังนี้
-- pyicu ส่งค่าสัทอักษร
-- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน (**หากมีข้อผิดพลาด ให้ใช้คำอ่าน เนื่องจากตัว royin ไม่มีตัวแปลงคำเป็นคำอ่าน**)
-
-รับค่า ''str'' ข้อความ
-
-คืนค่าเป็น ''str'' ข้อความ
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.transliterate import romanize, transliterate
-
-romanize("แมว")  # 'maew'
-transliterate("นก")
-```
-
-### spell
-
-ตรวจสอบคำผิดในภาษาไทย
-
-```python
-spell(word, engine="pn")
-```
-
-engine ที่รองรับ
-- pn (ค่าเริ่มต้น) พัฒนาจาก Peter Norvig
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell import spell
-
-a = spell("สี่เหลียม")
-print(a)  # ['สี่เหลี่ยม']
-```
-#### pn
-
-```python
-correct(word)
-```
-
-จะคืนค่าคำที่เป็นไปได้มากที่สุด
-
-**ตัวอย่างการใช้งาน**
-
-```python
-from pythainlp.spell.pn import correct
-
-a = correct("สี่เหลียม")
-print(a)  # ['สี่เหลี่ยม']
-```
-
-### pythainlp.number
-
-จัดการกับตัวเลข
-
-```python
-from pythainlp.number import *
-```
-
-มีฟังก์ชันดังนี้
-- thai_num_to_num(str) - แปลงเลขไทยสู่เลขอารบิก
-- thai_num_to_text(str) - เลขไทยสู่คำอ่านไทย
-- num_to_thai_num(str) - เลขอารบิกสู่เลขไทย
-- num_to_text(str) - เลขสู่ข้อความ
-- text_to_num(str) - ข้อความสู่เลข
-- bahttext(float) - อ่านจำนวนภาษาไทย (หน่วยเงินบาท) รับค่าเป็น ''float'' คืนค่าเป็น 'str'
-- num_to_thaiword(float) - อ่านจำนวนภาษาไทย รับค่าเป็น ''float'' คืนค่าเป็น 'str'
-- thaiword_to_num(List[str]) - แปลคำอ่านจำนวนไทยเป็นตัวเลขจำนวนเต็ม รับค่าเป็น ''List[str]'' คืนค่าเป็น int
-
-**ตัวอย่าง**
-
-```python
-thaiword_to_num(["หกหมื่น", "หกพัน", "หกร้อย", "หกสิบ", "หก"])  # 66666
-```
-
-### collation
-
-เรียงลำดับข้อมูลภาษาไทยใน List
-
-```python
-from pythainlp.util import collate
-print(collate(["ไก่", "ไข่", "กา", "ฮา"]))  # ['กา', 'ไก่', 'ไข่', 'ฮา']
-```
-
-รับ list คืนค่า list
-
-### date
-
-#### thai_strftime
-
-จัดรูปแบบข้อความบอกวันที่และเวลาเป็นภาษาไทยและปีพุทธศักราช
-
-```python
-import datetime
-from pythainlp.util import thai_strftime
-
-fmt = "%Aที่ %-d %B พ.ศ. %Y เวลา %H:%Mน. (%a %d-%b-%y)"
-date = datetime.datetime(1976, 10, 6, 1, 40)
-print(thai_strftime(date, fmt))
-# วันพุธที่ 6 ตุลาคม พ.ศ. 2519 เวลา 01:40น. (พ 06-ต.ค.-19)
-```
-### rank
-
-#### rank
-
-หาคำที่มีจำนวนการใช้งานมากที่สุด
-
-```python
-from pythainlp.util import rank
-
-rank(list)
-```
-
-คืนค่าออกมาเป็น dict
-
-**ตัวอย่างการใช้งาน**
-
-```python
-rank(["แมง", "แมง", "คน"])  # Counter({'แมง': 2, 'คน': 1})
-```
-
-### soundex
-
-กฎที่รองรับ
-- lk82 - กฎการเข้ารหัสซาวน์เด็กซ์ของ วิชิตหล่อจีระชุณห์กุล และ เจริญ คุวินทร์พันธุ์
-- udom83 - กฎการเข้ารหัสซาวน์เด็กซ์ของ วรรณี อุดมพาณิชย์
-- metasound - กฎการเข้ารหัส MetaSoound ของ Snae & Brückner (2009)
-
-เครดิต
-- โค้ด lk82 และ udom83 - Korakot Chaovavanich https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8
-- โค้ด metasound - Wannaphong Phattiyaphaibun, ปรับปรุงต่อโดย Arthit Suriyawongkul
-
-**การใช้งาน**
-
-```python
-from pythainlp.soundex import lk82, metasound, udom83
-
-print(lk82("รถ"))  # ร3000
-print(lk82("รด"))  # ร3000
-print(lk82("จัน"))  # จ4000
-print(lk82("จันทร์"))  # จ4000
-print(udom83("รถ"))  # ร800000
-print(metasound("รัก"))  # 'ร100'
-```
-
-### Util
-
-#### normalize
-
-ซ่อมข้อความภาษาไทย
-
-```python
-normalize(text)
-```
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.util import normalize
-
-# เ เ ป ล ก กับ แปลก
-normalize("เเปลก") == "แปลก"  # True
-```
-
-#### แก้ไขปัญหาการลืมเปลี่ยนภาษาแป้นพิมพ์
-
-```python
-from pythainlp.util import eng_to_thai, thai_to_eng
-```
-
-มีคำสั่งดังนี้
-
-- eng_to_thai(str) แปลงแป้นตัวอักษรอังกฤษเป็นไทย
-- thai_to_eng(str) แปลงแป้นตัวอักษรไทยเป็นอังกฤษ
-
-คืนค่าออกมาเป็น str
-
-### Corpus
-
-#### WordNet ภาษาไทย
-
-เรียกใช้งาน
-
-```python
-from pythainlp.corpus import wordnet
-```
-
-**การใช้งาน**
-
-API เหมือนกับ NLTK โดยรองรับ API ดังนี้
-
-- wordnet.synsets(word)
-- wordnet.synset(name_synsets)
-- wordnet.all_lemma_names(pos=None, lang="tha")
-- wordnet.all_synsets(pos=None)
-- wordnet.langs()
-- wordnet.lemmas(word,pos=None,lang="tha")
-- wordnet.lemma(name_synsets)
-- wordnet.lemma_from_key(key)
-- wordnet.path_similarity(synsets1,synsets2)
-- wordnet.lch_similarity(synsets1,synsets2)
-- wordnet.wup_similarity(synsets1,synsets2)
-- wordnet.morphy(form, pos=None)
-- wordnet.custom_lemmas(tab_file, lang)
-
-**ตัวอย่าง**
-
-```python
-from pythainlp.corpus import wordnet
-
-print(wordnet.synsets("หนึ่ง"))
-# [Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
-
-print(wordnet.synsets("หนึ่ง")[0].lemma_names("tha"))
-# []
-
-print(wordnet.synset("one.s.05"))
-# Synset('one.s.05')
-
-print(wordnet.synset("spy.n.01").lemmas())
-# [Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
-
-print(wordnet.synset("spy.n.01").lemma_names("tha"))
-# ['สปาย', 'สายลับ']
-```
-
-#### พยัญชนะในภาษาไทย
-
-```python
-from pythainlp import thai_consonants
-```
-
-จะได้ str ที่มีพยัญชนะในภาษาไทยทั้งหมด
-
-#### วรรณยุกต์ในภาษาไทย
-
-```python
-from pythainlp import thai_tonemarks
-```
-จะได้ str ที่มีวรรณยุกต์ในภาษาไทยทั้งหมด
-
-#### stopword ภาษาไทย
-
-```python
-from pythainlp.corpus import thai_stopwords
-
-stopwords = thai_stopwords()
-```
-
-#### รายการคำในภาษาไทย
-
-```python
-from pythainlp.corpus import thai_words
-
-words = thai_words()
-```
-
-#### ชื่อประเทศ ภาษาไทย
-
-```python
-from pythainlp.corpus import countries
-
-for country in countries():
-    print(country)
-```
-
-#### provinces
-
-ข้อมูลชื่อจังหวัดในประเทศไทย
-
-```python
-from pythainlp.corpus import provinces
-
-for province in provinces():
-    print(province)
-```
-
-##### tag_provinces
-
-สำหรับใช้ติดป้ายกำกับชื่อจังหวัดในประเทศไทย
-
-```python
-from pythainlp.tag.locations import tag_provinces
-
-tag_provinces(text_list)
-```
-
-text_list คือ ข้อความภาษาไทยที่อยู่ใน list โดยผ่านการตัดคำมาแล้ว
-
-**ตัวอย่าง**
-```python
-text = ['หนองคาย', 'น่าอยู่']
-tag_provinces(text)
-# [('หนองคาย', 'B-LOCATION'), ('น่าอยู่', 'O')]  
-```
-
-#### ConceptNet
-
-เครื่องมือสำหรับ ConceptNet
-
-**ค้นหา edges**
-
-```python
-edges(word, lang="th")
-```
-
-return dict
-
-#### TNC
-
-สำหรับใช้จัดการกับ Thai National Corpus (http://www.arts.chula.ac.th/~ling/TNC/index.php)
-
-##### word_freq
-
-ใช้วัดความถี่ของคำ
-
-```python
-word_freq(word, domain="all")
-```
-
-word คือ คำ
-
-domain คือ หมวดหมู่ของคำ
-
-มีหมวดหมู่ดังนี้
-- all
-- imaginative
-- natural-pure-science
-- applied-science
-- social-science
-- world-affairs-history
-- commerce-finance
-- arts
-- belief-thought
-- leisure
-- others
diff --git a/docs/conf.py b/docs/conf.py
index 5c2db572b..cdb7e7fe4 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,9 +1,6 @@
 # -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
-#
-# This file does only contain a selection of the most common options. For a
-# full list see the documentation:
 # http://www.sphinx-doc.org/en/master/config
 
 # -- Path setup --------------------------------------------------------------
@@ -27,9 +24,9 @@
 copyright = u'2017-%s, %s (Apache Software License 2.0)' % (curyear, project)
 
 # The short X.Y version
-version = ''
+version = '2.0'
 # The full version, including alpha/beta/rc tags
-release = '2.0'
+release = '2.0.3'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/meta.yaml b/meta.yaml
index 78d7a4794..52ebb1d37 100644
--- a/meta.yaml
+++ b/meta.yaml
@@ -1,4 +1,4 @@
-{% set version = "2.0.3" %}
+{% set version = "2.0.4" %}
 
 package:
   name: pythainlp
diff --git a/notebooks/pythainlp-get-started.ipynb b/notebooks/pythainlp-get-started.ipynb
index 00b804c45..bc131bd4f 100644
--- a/notebooks/pythainlp-get-started.ipynb
+++ b/notebooks/pythainlp-get-started.ipynb
@@ -308,7 +308,7 @@
     "\n",
     "print(\"sent_tokenize:\", sent_tokenize(text))\n",
     "print(\"word_tokenize:\", word_tokenize(text))\n",
-    "print(\"word_tokenize, without whitespace:\", word_tokenize(text, whitespaces=False))"
+    "print(\"word_tokenize, without whitespace:\", word_tokenize(text, keep_whitespace=False))"
    ]
   },
   {
@@ -382,15 +382,6 @@
     "print(\"custom:\", custom_tokenizer.word_tokenize(text))"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Thai Character Cluster (TCC)\n",
-    "\n",
-    "According to [Character Cluster Based Thai Information Retrieval](https://www.researchgate.net/publication/2853284_Character_Cluster_Based_Thai_Information_Retrieval) (Theeramunkong et al. 2004)."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 15,
@@ -399,7 +390,21 @@
     {
      "data": {
       "text/plain": [
-       "['ป', 'ระ', 'เท', 'ศ', 'ไท', 'ย']"
+       "['มี|ความ|เป็น|ไป|ได้|อย่าง|ไร|บ้าง|',\n",
+       " 'มี|ความ|เป็นไป|ได้|อย่าง|ไร|บ้าง|',\n",
+       " 'มี|ความ|เป็นไปได้|อย่าง|ไร|บ้าง|',\n",
+       " 'มี|ความเป็นไป|ได้|อย่าง|ไร|บ้าง|',\n",
+       " 'มี|ความเป็นไปได้|อย่าง|ไร|บ้าง|',\n",
+       " 'มี|ความ|เป็น|ไป|ได้|อย่างไร|บ้าง|',\n",
+       " 'มี|ความ|เป็นไป|ได้|อย่างไร|บ้าง|',\n",
+       " 'มี|ความ|เป็นไปได้|อย่างไร|บ้าง|',\n",
+       " 'มี|ความเป็นไป|ได้|อย่างไร|บ้าง|',\n",
+       " 'มี|ความเป็นไปได้|อย่างไร|บ้าง|',\n",
+       " 'มี|ความ|เป็น|ไป|ได้|อย่างไรบ้าง|',\n",
+       " 'มี|ความ|เป็นไป|ได้|อย่างไรบ้าง|',\n",
+       " 'มี|ความ|เป็นไปได้|อย่างไรบ้าง|',\n",
+       " 'มี|ความเป็นไป|ได้|อย่างไรบ้าง|',\n",
+       " 'มี|ความเป็นไปได้|อย่างไรบ้าง|']"
       ]
      },
      "execution_count": 15,
@@ -408,9 +413,18 @@
     }
    ],
    "source": [
-    "from pythainlp.tokenize import subword_tokenize\n",
+    "from pythainlp.tokenize.multi_cut import find_all_segment, mmcut, segment\n",
     "\n",
-    "subword_tokenize(\"ประเทศไทย\")"
+    "find_all_segment(\"มีความเป็นไปได้อย่างไรบ้าง\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Subword and Thai Character Cluster (TCC)\n",
+    "\n",
+    "According to [Character Cluster Based Thai Information Retrieval](https://www.researchgate.net/publication/2853284_Character_Cluster_Based_Thai_Information_Retrieval) (Theeramunkong et al. 2004)."
    ]
   },
   {
@@ -421,7 +435,7 @@
     {
      "data": {
       "text/plain": [
-       "False"
+       "['ป', 'ระ', 'เท', 'ศ', 'ไท', 'ย']"
       ]
      },
      "execution_count": 16,
@@ -430,7 +444,9 @@
     }
    ],
    "source": [
-    "isinstance(subword_tokenize(\"ประเทศไทย\", engine=\"etcc\"), str)"
+    "from pythainlp import subword_tokenize\n",
+    "\n",
+    "subword_tokenize(\"ประเทศไทย\")"
    ]
   },
   {
@@ -457,7 +473,7 @@
     }
    ],
    "source": [
-    "from pythainlp import tcc\n",
+    "from pythainlp.tokenize import tcc\n",
     "\n",
     "tcc.segment(\"ประเทศไทย\")"
    ]
@@ -535,17 +551,30 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "mɛːw\n"
-     ]
+     "data": {
+      "text/plain": [
+       "'mɛːw'"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "from pythainlp.transliterate import transliterate\n",
     "\n",
-    "print(transliterate(\"แมว\"))"
+    "transliterate(\"แมว\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#!pip3 install pythainlp[icu]\n",
+    "#transliterate(\"แมว\", engine=\"icu\")"
    ]
   },
   {
@@ -557,7 +586,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -566,7 +595,7 @@
        "True"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -588,7 +617,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -612,7 +641,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -652,7 +681,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -661,7 +690,7 @@
        "['เหลียม', 'เหลือม']"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -675,7 +704,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
     {
@@ -684,7 +713,7 @@
        "'เหลียม'"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -707,7 +736,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -730,24 +759,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[('งวงช้าง', 12),\n",
-       " ('เทิบทาบ', 7),\n",
-       " ('กริน', 3),\n",
-       " ('นาภี', 2),\n",
-       " ('แด่วๆ', 3),\n",
-       " ('คู่ใจ', 7),\n",
-       " ('คุณพ่อ', 732),\n",
-       " ('สิ้น', 755),\n",
-       " ('เยาะ', 150)]"
+       "[('แสดงทรรศนะ', 2),\n",
+       " ('เจ้าอธิการ', 4),\n",
+       " ('วินิจฉัย', 133),\n",
+       " ('อ่อนหวาน', 90),\n",
+       " ('ไตรตรา', 3),\n",
+       " ('คำๆ', 15),\n",
+       " ('ปริ่ม', 13),\n",
+       " ('มนุ', 3),\n",
+       " ('ส้าง', 5)]"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -765,7 +794,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
@@ -774,7 +803,7 @@
        "39977"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -786,7 +815,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -795,7 +824,7 @@
        "30379"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -807,7 +836,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -816,7 +845,7 @@
        "76706"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -828,7 +857,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
@@ -837,7 +866,7 @@
        "76700"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -859,7 +888,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -868,7 +897,7 @@
        "[('การ', 'FIXN'), ('เดินทาง', 'VACT')]"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -881,15 +910,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[[('ราชกิจจานุเบกษา', 'NCMN'),\n",
-       "  ('เผยแพร่', 'VACT'),\n",
-       "  ('ประกาศสำนักนายกฯ', 'NCMN'),\n",
+       "[[('ประกาศสำนักนายกฯ', 'NCMN'),\n",
        "  (' ', 'PUNC'),\n",
        "  ('ให้', 'JSBR'),\n",
        "  (' ', 'PUNC'),\n",
@@ -905,25 +932,19 @@
        "  ('แต่งตั้ง', 'VACT'),\n",
        "  ('ให้', 'JSBR'),\n",
        "  ('เป็น', 'VSTA'),\n",
-       "  ('ข้าราชการ', 'NCMN'),\n",
-       "  ('พลเรือน', 'NCMN'),\n",
-       "  ('สามัญ', 'NCMN'),\n",
-       "  ('ตำแหน่ง', 'NCMN'),\n",
-       "  (' ', 'PUNC'),\n",
        "  (\"'อธิบดีกรมประชาสัมพันธ์'\", 'NCMN')]]"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "sents = [[\"ราชกิจจานุเบกษา\", \"เผยแพร่\", \"ประกาศสำนักนายกฯ\", \" \", \"ให้\",\n",
+    "sents = [[\"ประกาศสำนักนายกฯ\", \" \", \"ให้\",\n",
     "    \" \", \"'พล.ท.สรรเสริญ แก้วกำเนิด'\", \" \", \"พ้นจากตำแหน่ง\",\n",
     "    \" \", \"ผู้ทรงคุณวุฒิพิเศษ\", \"กองทัพบก\", \" \", \"กระทรวงกลาโหม\"],\n",
-    "    [\"และ\",\"แต่งตั้ง\",\"ให้\", \"เป็น\", \"ข้าราชการ\", \"พลเรือน\", \"สามัญ\",\n",
-    "    \"ตำแหน่ง\", \" \", \"'อธิบดีกรมประชาสัมพันธ์'\"]]\n",
+    "    [\"และ\", \"แต่งตั้ง\", \"ให้\", \"เป็น\", \"'อธิบดีกรมประชาสัมพันธ์'\"]]\n",
     "\n",
     "pos_tag_sents(sents)"
    ]
@@ -942,15 +963,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[('วันที่', 'NOUN', 'O'),\n",
-       " (' ', 'PUNCT', 'O'),\n",
-       " ('15', 'NUM', 'B-DATE'),\n",
+       "[('15', 'NUM', 'B-DATE'),\n",
        " (' ', 'PUNCT', 'I-DATE'),\n",
        " ('ก.ย.', 'NOUN', 'I-DATE'),\n",
        " (' ', 'PUNCT', 'I-DATE'),\n",
@@ -964,10 +983,24 @@
        " (':', 'PUNCT', 'I-TIME'),\n",
        " ('49', 'NUM', 'I-TIME'),\n",
        " (' ', 'PUNCT', 'I-TIME'),\n",
-       " ('น.', 'NOUN', 'I-TIME')]"
+       " ('น.', 'NOUN', 'I-TIME'),\n",
+       " (' ', 'PUNCT', 'O'),\n",
+       " ('เดินทาง', 'VERB', 'O'),\n",
+       " ('จาก', 'ADP', 'O'),\n",
+       " ('กทม.', 'NOUN', 'B-LOCATION'),\n",
+       " ('ไป', 'AUX', 'O'),\n",
+       " ('จังหวัด', 'NOUN', 'B-LOCATION'),\n",
+       " ('กำแพงเพชร', 'NOUN', 'I-LOCATION'),\n",
+       " (' ', 'PUNCT', 'I-MONEY'),\n",
+       " ('ตั๋ว', 'NOUN', 'I-MONEY'),\n",
+       " ('ราคา', 'NOUN', 'I-MONEY'),\n",
+       " (' ', 'PUNCT', 'I-MONEY'),\n",
+       " ('297', 'NUM', 'I-MONEY'),\n",
+       " (' ', 'PUNCT', 'I-MONEY'),\n",
+       " ('บาท', 'NOUN', 'I-MONEY')]"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -976,7 +1009,7 @@
     "from pythainlp.tag.named_entity import ThaiNameTagger\n",
     "\n",
     "ner = ThaiNameTagger()\n",
-    "ner.get_ner(\"วันที่ 15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น.\")"
+    "ner.get_ner(\"15 ก.ย. 61 ทดสอบระบบเวลา 14:49 น. เดินทางจากกทม.ไปจังหวัดกำแพงเพชร ตั๋วราคา 297 บาท\")"
    ]
   },
   {
@@ -988,7 +1021,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
@@ -1008,7 +1041,7 @@
        "0.99259853"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 37,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1021,7 +1054,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -1037,7 +1070,7 @@
        "'แมว'"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1055,7 +1088,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
@@ -1064,7 +1097,7 @@
        "'หนึ่งล้านสองแสนสามหมื่นสี่พันห้าร้อยหกสิบเจ็ดล้านแปดแสนเก้าหมื่นหนึ่งร้อยยี่สิบสามบาทสี่สิบห้าสตางค์'"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1077,7 +1110,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -1086,7 +1119,7 @@
        "'หนึ่งบาทเก้าสิบเอ็ดสตางค์'"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
diff --git a/notebooks/wongnai_classification.ipynb b/notebooks/wongnai_classification.ipynb
index 70345b9e1..59d1ae760 100644
--- a/notebooks/wongnai_classification.ipynb
+++ b/notebooks/wongnai_classification.ipynb
@@ -355,7 +355,7 @@
     "def process_text(text):\n",
     "    nopunc = [char for char in text if char not in string.punctuation]\n",
     "    nopunc = ''.join(nopunc)\n",
-    "    return [word for word in word_tokenize(nopunc,'ulmfit') if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
+    "    return [word for word in word_tokenize(nopunc, engine='ulmfit') if word and not re.search(pattern=r\"\\s+\", string=word)]\n",
     "def split_text(text):\n",
     "    return text.split()\n",
     "\n",
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
index 66326efe4..c316fa4e9 100644
--- a/pythainlp/__init__.py
+++ b/pythainlp/__init__.py
@@ -1,6 +1,6 @@
 ﻿# -*- coding: utf-8 -*-
 
-__version__ = "2.0.3"
+__version__ = "2.0.4"
 
 thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ"  # 44 chars
 thai_vowels = "ฤฦะ\u0e31าำ\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39เแโใไ\u0e45\u0e47"  # 19
@@ -27,6 +27,11 @@
 from pythainlp.soundex import soundex
 from pythainlp.spell import correct, spell
 from pythainlp.tag import pos_tag
-from pythainlp.tokenize import sent_tokenize, tcc, word_tokenize, Tokenizer
+from pythainlp.tokenize import (
+    Tokenizer,
+    sent_tokenize,
+    subword_tokenize,
+    word_tokenize,
+)
 from pythainlp.transliterate import romanize, transliterate
 from pythainlp.util import collate, thai_strftime
diff --git a/pythainlp/corpus/words_th.txt b/pythainlp/corpus/words_th.txt
index 4e6259387..0fa96af67 100755
--- a/pythainlp/corpus/words_th.txt
+++ b/pythainlp/corpus/words_th.txt
@@ -61186,7 +61186,6 @@
 แอกน้อย
 แอด ๆ
 แอบ ๆ
-๒,๕๔๐ รายการ
 โอ้กอ้าก
 โอฆ
 โอฆชล
diff --git a/pythainlp/soundex/__init__.py b/pythainlp/soundex/__init__.py
index fac5f978d..1320353d0 100644
--- a/pythainlp/soundex/__init__.py
+++ b/pythainlp/soundex/__init__.py
@@ -12,7 +12,7 @@
 # [KSS97] https://linux.thai.net/~thep/soundex/soundex.html
 
 
-def soundex(text: str, engine="udom83") -> str:
+def soundex(text: str, engine: str = "udom83") -> str:
     """
     Thai Soundex
 
@@ -24,9 +24,7 @@ def soundex(text: str, engine="udom83") -> str:
         * metasound
     :return: soundex code
     """
-    if engine == "udom83":
-        _soundex = udom83
-    elif engine == "lk82":
+    if engine == "lk82":
         _soundex = lk82
     elif engine == "metasound":
         _soundex = metasound
diff --git a/pythainlp/soundex/lk82.py b/pythainlp/soundex/lk82.py
index e0dee6d6b..a173cae5f 100644
--- a/pythainlp/soundex/lk82.py
+++ b/pythainlp/soundex/lk82.py
@@ -28,7 +28,7 @@ def lk82(text: str) -> str:
     :param str text: Thai word
     :return: LK82 soundex
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     text = _RE_1.sub("", text)  # 4.ลบวรรณยุกต์
diff --git a/pythainlp/soundex/metasound.py b/pythainlp/soundex/metasound.py
index 6998f81a9..d594c950e 100644
--- a/pythainlp/soundex/metasound.py
+++ b/pythainlp/soundex/metasound.py
@@ -38,7 +38,7 @@ def metasound(text: str, length: int = 4) -> str:
         >>> metasound("บูรณการ", 5))
         'บ5515'
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     # keep only consonants and thanthakhat
diff --git a/pythainlp/soundex/udom83.py b/pythainlp/soundex/udom83.py
index dce60feaa..1c1cd5149 100644
--- a/pythainlp/soundex/udom83.py
+++ b/pythainlp/soundex/udom83.py
@@ -37,7 +37,7 @@ def udom83(text: str) -> str:
     :return: Udom83 soundex
     """
 
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     text = _RE_1.sub("ัน\\1", text)
diff --git a/pythainlp/spell/__init__.py b/pythainlp/spell/__init__.py
index c4b654f53..ba73bffb5 100644
--- a/pythainlp/spell/__init__.py
+++ b/pythainlp/spell/__init__.py
@@ -10,7 +10,7 @@
 __all__ = ["DEFAULT_SPELL_CHECKER", "correct", "spell", "NorvigSpellChecker"]
 
 
-def spell(word: str, engine="pn") -> List[str]:
+def spell(word: str, engine: str = "pn") -> List[str]:
     """
     :param str word: word to check spelling
     :param str engine:
@@ -21,7 +21,7 @@ def spell(word: str, engine="pn") -> List[str]:
     return DEFAULT_SPELL_CHECKER.spell(word)
 
 
-def correct(word: str, engine="pn") -> str:
+def correct(word: str, engine: str = "pn") -> str:
     """
     :param str word: word to correct spelling
     :param str engine:
diff --git a/pythainlp/summarize/__init__.py b/pythainlp/summarize/__init__.py
index fda0346b5..e74e0e752 100644
--- a/pythainlp/summarize/__init__.py
+++ b/pythainlp/summarize/__init__.py
@@ -3,12 +3,16 @@
 Summarization
 """
 
+from typing import List
+
 from pythainlp.tokenize import sent_tokenize
 
 from .freq import FrequencySummarizer
 
 
-def summarize(text, n, engine="frequency", tokenizer="newmm"):
+def summarize(
+    text: str, n: int, engine: str = "frequency", tokenizer: str = "newmm"
+) -> List[str]:
     """
     Thai text summarization
 
diff --git a/pythainlp/summarize/freq.py b/pythainlp/summarize/freq.py
index 2dc7044fd..bc43ef437 100644
--- a/pythainlp/summarize/freq.py
+++ b/pythainlp/summarize/freq.py
@@ -5,6 +5,7 @@
 from collections import defaultdict
 from heapq import nlargest
 from string import punctuation
+from typing import List
 
 from pythainlp.corpus import thai_stopwords
 from pythainlp.tokenize import sent_tokenize, word_tokenize
@@ -36,9 +37,9 @@ def __compute_frequencies(self, word_tokenized_sents):
     def __rank(self, ranking, n: int):
         return nlargest(n, ranking, key=ranking.get)
 
-    def summarize(self, text: str, n: int, tokenizer: str):
+    def summarize(self, text: str, n: int, tokenizer: str = "newmm") -> List[str]:
         sents = sent_tokenize(text)
-        word_tokenized_sents = [word_tokenize(sent, tokenizer) for sent in sents]
+        word_tokenized_sents = [word_tokenize(sent, engine=tokenizer) for sent in sents]
         self.__freq = self.__compute_frequencies(word_tokenized_sents)
         ranking = defaultdict(int)
 
diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py
index 94e952fac..1709c9c3f 100644
--- a/pythainlp/tokenize/__init__.py
+++ b/pythainlp/tokenize/__init__.py
@@ -3,6 +3,7 @@
 Thai tokenizers
 """
 import re
+import sys
 from typing import Iterable, List, Union
 
 from pythainlp.corpus import get_corpus, thai_syllables, thai_words
@@ -14,20 +15,21 @@
 
 
 def word_tokenize(
-    text: str, engine: str = "newmm", whitespaces: bool = True
+    text: str, custom_dict: Trie = None, engine: str = "newmm", keep_whitespace: bool = True
 ) -> List[str]:
     """
     :param str text: text to be tokenized
     :param str engine: tokenizer to be used
-    :param bool whitespaces: True to output no whitespace, a common mark of end of phrase in Thai
+    :param dict custom_dict: a dictionary trie
+    :param bool keep_whitespace: True to keep whitespaces, a common mark for end of phrase in Thai
     :Parameters for engine:
         * newmm (default) - dictionary-based, Maximum Matching + Thai Character Cluster
         * longest - dictionary-based, Longest Matching
-        * icu - wrapper for ICU, dictionary-based
         * deepcut - wrapper for deepcut, language-model-based https://github.com/rkcosmos/deepcut
-        * ulmfit - use newmm engine with a specific dictionary for use with thai2vec
+        * icu - wrapper for ICU (International Components for Unicode, using PyICU), dictionary-based
+        * ulmfit - for thai2fit
+        * a custom_dict can be provided for newmm, longest, and deepcut
     :return: list of words, tokenized from the text
-
     **Example**::
         >>> from pythainlp.tokenize import word_tokenize
         >>> text = "โอเคบ่พวกเรารักภาษาบ้านเกิด"
@@ -36,64 +38,72 @@ def word_tokenize(
         >>> word_tokenize(text, engine="icu")
         ['โอ', 'เค', 'บ่', 'พวก', 'เรา', 'รัก', 'ภาษา', 'บ้าน', 'เกิด']
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
+    segments = []
     if engine == "newmm" or engine == "onecut":
         from .newmm import segment
-    elif engine == "longest" or engine == "longest-matching":
+
+        segments = segment(text, custom_dict)
+    elif engine == "longest":
         from .longest import segment
-    elif engine == "ulmfit":
-        from .newmm import segment as segment_
 
-        def segment(text):
-            return segment_(text, trie=FROZEN_DICT_TRIE)
+        segments = segment(text, custom_dict)
+    elif engine == "mm" or engine == "multi_cut":
+        from .multi_cut import segment
+
+        segments = segment(text, custom_dict)
+    elif engine == "deepcut":  # deepcut can optionally use dictionary
+        from .deepcut import segment
+
+        if custom_dict:
+            custom_dict = list(custom_dict)
+            segments = segment(text, custom_dict)
+        else:
+            segments = segment(text)
+    elif engine == "ulmfit":  # ulmfit has its own specific dictionary
+        from .newmm import segment
 
+        segments = segment(text, custom_dict=FROZEN_DICT_TRIE)
     elif engine == "icu":
         from .pyicu import segment
-    elif engine == "deepcut":
-        from .deepcut import segment
-    elif engine == "mm" or engine == "multi_cut":
-        from .multi_cut import segment
+
+        segments = segment(text)
     else:  # default, use "newmm" engine
         from .newmm import segment
 
-    if not whitespaces:
-        return [token.strip(" ") for token in segment(text) if token.strip(" ")]
+        if custom_dict:
+            custom_dict = dict_trie(custom_dict)
+        segments = segment(text, custom_dict)
+
+    if not keep_whitespace:
+        segments = [token.strip(" ") for token in segments if token.strip(" ")]
 
-    return segment(text)
+    return segments
 
 
 def dict_word_tokenize(
-    text: str, custom_dict: Trie, engine: str = "newmm"
+    text: str,
+    custom_dict: Trie = DEFAULT_DICT_TRIE,
+    engine: str = "newmm",
+    keep_whitespace: bool = True,
 ) -> List[str]:
     """
-    :meth:`dict_word_tokenize` tokenizes word based on the dictionary you provide. The format has to be in trie data structure.
+    :meth: DEPRECATED: Please use `word_tokenize()` with a `custom_dict` argument instead
     :param str text: text to be tokenized
-    :param dict custom_dict: a dictionary trie
-    :param str engine: choose between different options of engine to token (newmm, longest)
+    :param dict custom_dict: a dictionary trie, or an iterable of words, or a string of dictionary path
+    :param str engine: choose between different options of engine to token (newmm [default], mm, longest, and deepcut)
+    :param bool keep_whitespace: True to keep whitespaces, a common mark for end of phrase in Thai
     :return: list of words
-    **Example**::
-        >>> from pythainlp.tokenize import dict_word_tokenize, dict_trie
-        >>> words = ["แมว", "ดี"]
-        >>> trie = dict_trie(words)
-        >>> dict_word_tokenize("แมวดีดีแมว", trie)
-        ['แมว', 'ดี', 'ดี', 'แมว']
     """
-
-    if not text:
-        return []
-
-    if engine == "newmm" or engine == "onecut":
-        from .newmm import segment
-    elif engine == "longest" or engine == "longest-matching":
-        from .longest import segment
-    elif engine == "mm" or engine == "multi_cut":
-        from .multi_cut import segment
-    else:  # default, use "newmm" engine
-        from .newmm import segment
-
-    return segment(text, custom_dict)
+    print(
+        "Deprecated. Use word_tokenize() with a custom_dict argument instead.",
+        file=sys.stderr,
+    )
+    return word_tokenize(
+        text=text, custom_dict=custom_dict, engine=engine, keep_whitespace=keep_whitespace
+    )
 
 
 def sent_tokenize(text: str, engine: str = "whitespace+newline") -> List[str]:
@@ -106,7 +116,7 @@ def sent_tokenize(text: str, engine: str = "whitespace+newline") -> List[str]:
     :return: a list of text, split by whitespace or new line.
     """
 
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
     sentences = []
@@ -128,16 +138,13 @@ def subword_tokenize(text: str, engine: str = "tcc") -> List[str]:
         * etcc - Enhanced Thai Character Cluster (Inrut et al. 2001) [In development]
     :return: a list of tokenized strings.
     """
-    if not text:
-        return ""
+    if not text or not isinstance(text, str):
+        return []
 
     if engine == "etcc":
         from .etcc import segment
-
-        return segment(text)
-
-    # default is "tcc"
-    from .tcc import segment
+    else:  # default
+        from .tcc import segment
 
     return segment(text)
 
@@ -149,7 +156,7 @@ def syllable_tokenize(text: str) -> List[str]:
     :return: returns list of strings of syllables
     """
 
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
     tokens = []
@@ -157,12 +164,12 @@ def syllable_tokenize(text: str) -> List[str]:
         words = word_tokenize(text)
         trie = dict_trie(dict_source=thai_syllables())
         for word in words:
-            tokens.extend(dict_word_tokenize(text=word, custom_dict=trie))
+            tokens.extend(word_tokenize(text=word, custom_dict=trie))
 
     return tokens
 
 
-def dict_trie(dict_source: Union[str, Iterable]) -> Trie:
+def dict_trie(dict_source: Union[str, Iterable[str], Trie]) -> Trie:
     """
     Create a dict trie which will be used for word_tokenize() function.
     For more information on the trie data structure,
@@ -171,37 +178,42 @@ def dict_trie(dict_source: Union[str, Iterable]) -> Trie:
     :param string/list dict_source: a list of vocaburaries or a path to source file
     :return: a trie created from a dictionary input
     """
+    trie = None
 
     if type(dict_source) is str:
         # Receive a file path of the dict to read
         with open(dict_source, "r", encoding="utf8") as f:
             _vocabs = f.read().splitlines()
-            return Trie(_vocabs)
+            trie = Trie(_vocabs)
     elif isinstance(dict_source, Iterable):
         # Received a sequence type object of vocabs
-        return Trie(dict_source)
+        trie = Trie(dict_source)
+    elif isinstance(dict_source, Trie):
+        trie = dict_source
     else:
         raise TypeError(
-            "Type of dict_source must be either str (path to source file) or iterable"
+            "Type of dict_source must be marisa_trie.Trie, or Iterable[str], or str (path to source file)"
         )
 
+    return trie
+
 
 class Tokenizer:
     def __init__(
-        self, custom_dict: Union[str, Iterable] = None, tokenize_engine: str = "newmm"
+        self, custom_dict: Union[Trie, Iterable[str], str] = None, engine: str = "newmm"
     ):
         """
         Initialize tokenizer object
 
-        :param str custom_dict: a file path or a list of vocaburaies to be used to create a trie (default - original lexitron)
-        :param str tokenize_engine: choose between different options of engine to token (newmm, mm, longest)
+        :param str custom_dict: a file path or a list of vocaburaies to be used to create a trie
+        :param str engine: choose between different options of engine to token (newmm, mm, longest)
         """
         self.__trie_dict = None
-        self.word_engine = tokenize_engine
+        self.__engine = engine
         if custom_dict:
             self.__trie_dict = dict_trie(custom_dict)
         else:
-            self.__trie_dict = dict_trie(thai_words())
+            self.__trie_dict = DEFAULT_DICT_TRIE
 
     def word_tokenize(self, text: str) -> List[str]:
         """
@@ -209,12 +221,10 @@ def word_tokenize(self, text: str) -> List[str]:
 
         :return: list of words, tokenized from the text
         """
-        return dict_word_tokenize(
-            text, custom_dict=self.__trie_dict, engine=self.word_engine
-        )
+        return word_tokenize(text, custom_dict=self.__trie_dict, engine=self.__engine)
 
-    def set_tokenize_engine(self, name_engine: str) -> None:
+    def set_tokenize_engine(self, engine: str) -> None:
         """
-        :param str name_engine: choose between different options of engine to token (newmm, mm, longest)
+        :param str engine: choose between different options of engine to token (newmm, mm, longest)
         """
-        self.word_engine = name_engine
+        self.__engine = engine
diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
index a3844c2f3..39ae78209 100644
--- a/pythainlp/tokenize/deepcut.py
+++ b/pythainlp/tokenize/deepcut.py
@@ -3,10 +3,21 @@
 Wrapper for deepcut Thai word segmentation
 """
 
-from typing import List
+from typing import List, Union
 
 import deepcut
 
+from marisa_trie import Trie
+
+
+def segment(text: str, custom_dict: Union[Trie, List[str], str] = None) -> List[str]:
+    if not text or not isinstance(text, str):
+        return []
+
+    if custom_dict:
+        if isinstance(custom_dict, Trie):
+            custom_dict = list(custom_dict)
+
+        return deepcut.tokenize(text, custom_dict)
 
-def segment(text: str) -> List[str]:
     return deepcut.tokenize(text)
diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py
index 727e903ec..efb0d5c45 100644
--- a/pythainlp/tokenize/etcc.py
+++ b/pythainlp/tokenize/etcc.py
@@ -29,7 +29,7 @@ def segment(text: str) -> str:
     :return: etcc
     """
 
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     if re.search(r"[เแ]" + _C + r"[" + "".join(_UV) + r"]" + r"\w", text):
diff --git a/pythainlp/tokenize/longest.py b/pythainlp/tokenize/longest.py
index 83ce495a1..d248213e4 100644
--- a/pythainlp/tokenize/longest.py
+++ b/pythainlp/tokenize/longest.py
@@ -6,9 +6,12 @@
 https://github.com/patorn/thaitokenizer/blob/master/thaitokenizer/tokenizer.py
 """
 import re
+from typing import List
 
 from pythainlp.tokenize import DEFAULT_DICT_TRIE
 
+from marisa_trie import Trie
+
 _FRONT_DEP_CHAR = [
     "ะ",
     "ั",
@@ -36,7 +39,7 @@
 
 
 class LongestMatchTokenizer(object):
-    def __init__(self, trie):
+    def __init__(self, trie: Trie):
         self.__trie = trie
 
     def __search_nonthai(self, text: str):
@@ -130,14 +133,17 @@ def __segment_text(self, text: str):
 
         return tokens
 
-    def tokenize(self, text):
+    def tokenize(self, text: str) -> List[str]:
         tokens = self.__segment_text(text)
         return tokens
 
 
-def segment(text, trie=None):
+def segment(text: str, custom_dict: Trie = None) -> List[str]:
     """ตัดคำภาษาไทยด้วยวิธี longest matching"""
-    if not trie:
-        trie = DEFAULT_DICT_TRIE
+    if not text or not isinstance(text, str):
+        return []
+
+    if not custom_dict:
+        custom_dict = DEFAULT_DICT_TRIE
 
-    return LongestMatchTokenizer(trie).tokenize(text)
+    return LongestMatchTokenizer(custom_dict).tokenize(text)
diff --git a/pythainlp/tokenize/multi_cut.py b/pythainlp/tokenize/multi_cut.py
index d161bdf4e..20fddd7bd 100644
--- a/pythainlp/tokenize/multi_cut.py
+++ b/pythainlp/tokenize/multi_cut.py
@@ -8,9 +8,12 @@
 """
 import re
 from collections import defaultdict
+from typing import List
 
 from pythainlp.tokenize import DEFAULT_DICT_TRIE
 
+from marisa_trie import Trie
+
 
 class LatticeString(str):
     """
@@ -40,13 +43,14 @@ def __init__(self, value, multi=None, in_dict=True):
 _PAT_ENG = re.compile(_RE_ENG)
 
 
-def _multicut(text, trie=None):
+def _multicut(text: str, custom_dict: Trie = None):
     """
     ส่งคืน LatticeString คืนมาเป็นก้อนๆ
     """
+    if not custom_dict:
+        custom_dict = DEFAULT_DICT_TRIE
+
     len_text = len(text)
-    if not trie:
-        trie = DEFAULT_DICT_TRIE
     words_at = defaultdict(list)  # main data structure
 
     def serialize(p, p2):  # helper function
@@ -64,7 +68,7 @@ def serialize(p, p2):  # helper function
         p = min(q)
         q -= {p}  # q.pop, but for set
 
-        for w in trie.prefixes(text[p:]):
+        for w in custom_dict.prefixes(text[p:]):
             words_at[p].append(w)
             q.add(p + len(w))
 
@@ -80,7 +84,7 @@ def serialize(p, p2):  # helper function
                 i = p + m.span()[1]
             else:  # skip น้อยที่สุด ที่เป็นไปได้
                 for i in range(p, len_text):
-                    ww = trie.prefixes(text[i:])
+                    ww = custom_dict.prefixes(text[i:])
                     m = _PAT_ENG.match(text[i:])
                     if ww or m:
                         break
@@ -93,7 +97,7 @@ def serialize(p, p2):  # helper function
             q.add(i)
 
 
-def mmcut(text):
+def mmcut(text: str):
     res = []
     for w in _multicut(text):
         mm = min(w.multi, key=lambda x: x.count("/"))
@@ -101,7 +105,7 @@ def mmcut(text):
     return res
 
 
-def _combine(ww):
+def _combine(ww: str):
     if ww == []:
         yield ""
     else:
@@ -114,22 +118,26 @@ def _combine(ww):
                     yield m.replace("/", "|") + "|" + tail
 
 
-def segment(text, trie=None):
+def segment(text: str, custom_dict: Trie = None) -> List[str]:
     """
     ใช้ในการหา list ที่สามารถตัดคำได้ทั้งหมด
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
-    return list(_multicut(text, trie=trie))
+    return list(_multicut(text, custom_dict=custom_dict))
 
 
-def find_all_segment(text, trie=None):
+def find_all_segment(text: str, custom_dict: Trie = None) -> List[str]:
     """
-    ใช้ในการหา list ที่สามารถตัดคำได้ทั้งหมด
+    Get all possible segment variations
+
+    :param str text: input string to be tokenized
+    :return: returns list of segment variations
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
-    ww = list(_multicut(text, trie=trie))
+    ww = list(_multicut(text, custom_dict=custom_dict))
+
     return list(_combine(ww))
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
index 066ff1017..cd246fb8f 100644
--- a/pythainlp/tokenize/newmm.py
+++ b/pythainlp/tokenize/newmm.py
@@ -13,6 +13,8 @@
 
 from pythainlp.tokenize import DEFAULT_DICT_TRIE
 
+from marisa_trie import Trie
+
 from .tcc import tcc_pos
 
 # ช่วยตัดพวกภาษาอังกฤษ เป็นต้น
@@ -28,7 +30,7 @@
 _PAT_TWOCHARS = re.compile("[ก-ฮ]{,2}$")
 
 
-def bfs_paths_graph(graph, start, goal):
+def _bfs_paths_graph(graph, start, goal):
     queue = [(start, [start])]
     while queue:
         (vertex, path) = queue.pop(0)
@@ -39,16 +41,16 @@ def bfs_paths_graph(graph, start, goal):
                 queue.append((next, path + [next]))
 
 
-def onecut(text: str, trie):
+def _onecut(text: str, custom_dict: Trie):
     graph = defaultdict(list)  # main data structure
-    allow_pos = tcc_pos(text)  # ตำแหน่งที่ตัด ต้องตรงกับ tcc
+    allow_pos = tcc_pos(text)  # separating position should aligned with TCC
 
     q = [0]  # min-heap queue
     last_p = 0  # last position for yield
     while q[0] < len(text):
         p = heappop(q)
 
-        for w in trie.prefixes(text[p:]):
+        for w in custom_dict.prefixes(text[p:]):
             p_ = p + len(w)
             if p_ in allow_pos:  # เลือกที่สอดคล้อง tcc
                 graph[p].append(p_)
@@ -57,7 +59,7 @@ def onecut(text: str, trie):
 
         # กรณี length 1 คือ ไม่กำกวมแล้ว ส่งผลลัพธ์ก่อนนี้คืนได้
         if len(q) == 1:
-            pp = next(bfs_paths_graph(graph, last_p, q[0]))
+            pp = next(_bfs_paths_graph(graph, last_p, q[0]))
             # เริ่มต้น last_p = pp[0] เอง
             for p in pp[1:]:
                 yield text[last_p:p]
@@ -74,7 +76,7 @@ def onecut(text: str, trie):
                     if i in allow_pos:  # ใช้ tcc ด้วย
                         ww = [
                             w
-                            for w in trie.prefixes(text[i:])
+                            for w in custom_dict.prefixes(text[i:])
                             if (i + len(w) in allow_pos)
                         ]
                         ww = [w for w in ww if not _PAT_TWOCHARS.match(w)]
@@ -90,12 +92,11 @@ def onecut(text: str, trie):
             heappush(q, i)
 
 
-# ช่วยให้ไม่ต้องพิมพ์ยาวๆ
-def segment(text: str, trie=None) -> List[str]:
-    if not text:
+def segment(text: str, custom_dict: Trie = None) -> List[str]:
+    if not text or not isinstance(text, str):
         return []
 
-    if not trie:
-        trie = DEFAULT_DICT_TRIE
+    if not custom_dict:
+        custom_dict = DEFAULT_DICT_TRIE
 
-    return list(onecut(text, trie))
+    return list(_onecut(text, custom_dict))
diff --git a/pythainlp/tokenize/pyicu.py b/pythainlp/tokenize/pyicu.py
index 33fc0aabc..9d1d37a6d 100644
--- a/pythainlp/tokenize/pyicu.py
+++ b/pythainlp/tokenize/pyicu.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """
-Wrapper for ICU word segmentation
+Wrapper for PyICU word segmentation
+https://github.com/ovalhub/pyicu
 """
 import re
 from typing import List
@@ -18,8 +19,9 @@ def _gen_words(text: str) -> str:
 
 
 def segment(text: str) -> List[str]:
-    if not text:
+    if not text or not isinstance(text, str):
         return []
 
     text = re.sub("([^\u0E00-\u0E7F\n ]+)", " \\1 ", text)
+
     return list(_gen_words(text))
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
index 47547ad4e..8949a04a8 100644
--- a/pythainlp/tokenize/tcc.py
+++ b/pythainlp/tokenize/tcc.py
@@ -49,23 +49,23 @@
 PAT_TCC = re.compile("|".join(RE_TCC))
 
 
-def tcc(w: str) -> str:
-    if not w:
+def tcc(text: str) -> str:
+    if not text or not isinstance(text, str):
         return ""
 
     p = 0
-    while p < len(w):
-        m = PAT_TCC.match(w[p:])
+    while p < len(text):
+        m = PAT_TCC.match(text[p:])
         if m:
             n = m.span()[1]
         else:
             n = 1
-        yield w[p : p + n]
+        yield text[p : p + n]
         p += n
 
 
 def tcc_pos(text: str) -> Set[int]:
-    if not text:
+    if not text or not isinstance(text, str):
         return set()
 
     p_set = set()
diff --git a/pythainlp/transliterate/__init__.py b/pythainlp/transliterate/__init__.py
index 91435cc54..f18af4afd 100644
--- a/pythainlp/transliterate/__init__.py
+++ b/pythainlp/transliterate/__init__.py
@@ -1,43 +1,40 @@
 # -*- coding: utf-8 -*-
 
-from pythainlp.tokenize import word_tokenize
-
 
 def romanize(text: str, engine: str = "royin") -> str:
     """
+    Rendering Thai words in the Latin alphabet or "romanization",
+    using the Royal Thai General System of Transcription (RTGS),
+    which is the official system published by the Royal Institute of Thailand.
     ถอดเสียงภาษาไทยเป็นอักษรละติน
     :param str text: Thai text to be romanized
-    :param str engine: 'royin' (default) or 'thai2rom'. 'royin' uses Thai Royal Institute standard. 'thai2rom' is deep learning Thai romanization (require keras).
-    :return: English (more or less) text that spells out how the Thai text should read.
+    :param str engine: 'royin' (default) or 'thai2rom'. 'royin' uses the Royal Thai General System of Transcription issued by Royal Institute of Thailand. 'thai2rom' is deep learning Thai romanization (require keras).
+    :return: A string of Thai words rendered in the Latin alphabet.
     """
 
-    if not isinstance(text, str) or not text:
+    if not text or not isinstance(text, str):
         return ""
 
     if engine == "thai2rom":
         from .thai2rom import romanize
-
-        return romanize(text)
     else:  # use default engine "royin"
         from .royin import romanize
 
-        words = word_tokenize(text)
-        romanized_words = [romanize(word) for word in words]
-
-        return "".join(romanized_words)
+    return romanize(text)
 
 
 def transliterate(text: str, engine: str = "ipa") -> str:
     """
+    Transliteration of Thai text
     :param str text: Thai text to be transliterated
-    :param str engine: 'ipa' (default) or 'pyicu'.
-    :return: A string of Internaitonal Phonetic Alphabets indicating how the text should read.
+    :param str engine: 'ipa' (International Phonetic Alphabet; default) or 'icu'.
+    :return: A string of Internaitonal Phonetic Alphabets indicating how the text should be pronounced.
     """
 
-    if not isinstance(text, str) or not text:
+    if not text or not isinstance(text, str):
         return ""
 
-    if engine == "pyicu":
+    if engine == "icu" or engine == "pyicu":
         from .pyicu import transliterate
     else:
         from .ipa import transliterate
diff --git a/pythainlp/transliterate/ipa.py b/pythainlp/transliterate/ipa.py
index be7c1e1c6..b6b9f5833 100644
--- a/pythainlp/transliterate/ipa.py
+++ b/pythainlp/transliterate/ipa.py
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 """
 Transliterating text to International Phonetic Alphabet (IPA)
+Using epitran
+https://github.com/dmort27/epitran
 """
 import epitran
 
diff --git a/pythainlp/transliterate/pyicu.py b/pythainlp/transliterate/pyicu.py
index 5e4a755aa..e5850ac33 100644
--- a/pythainlp/transliterate/pyicu.py
+++ b/pythainlp/transliterate/pyicu.py
@@ -1,13 +1,20 @@
 # -*- coding: utf-8 -*-
+"""
+Transliterating text to International Phonetic Alphabet (IPA)
+Using International Components for Unicode (ICU)
+https://github.com/ovalhub/pyicu
+"""
 from icu import Transliterator
 
-
 _ICU_THAI_TO_LATIN = Transliterator.createInstance("Thai-Latin")
 
 
 # ถอดเสียงภาษาไทยเป็นอักษรละติน
 def transliterate(text: str) -> str:
     """
+    Use ICU (International Components for Unicode) for transliteration
     ถอดเสียงภาษาไทยเป็นอักษรละติน รับค่า ''str'' ข้อความ คืนค่า ''str'' อักษรละติน
+    :param str text: Thai text to be transliterated.
+    :return: A string of Internaitonal Phonetic Alphabets indicating how the text should be pronounced.
     """
     return _ICU_THAI_TO_LATIN.transliterate(text)
diff --git a/pythainlp/transliterate/royin.py b/pythainlp/transliterate/royin.py
index d6f6f71c8..6a80e3230 100644
--- a/pythainlp/transliterate/royin.py
+++ b/pythainlp/transliterate/royin.py
@@ -1,7 +1,14 @@
 # -*- coding: utf-8 -*-
-
+"""
+The Royal Thai General System of Transcription (RTGS)
+is the official system for rendering Thai words in the Latin alphabet.
+It was published by the Royal Institute of Thailand.
+#https://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription
+"""
 import re
 
+from pythainlp import word_tokenize
+
 # สระ
 _vowel_patterns = """เ*ียว,\\1iao
 แ*็ว,\\1aeo
@@ -18,6 +25,7 @@
 *าย,\\1ai
 ไ*ย,\\1ai
 *ัย,\\1ai
+ไ**,\\1\\2ai
 ไ*,\\1ai
 ใ*,\\1ai
 *ว*,\\1ua\\2
@@ -117,9 +125,12 @@
 )
 
 
-def _normalize(text: str) -> str:
-    """ตัดอักษรที่ไม่ออกเสียง (การันต์ ไปยาลน้อย ไม้ยมก*) และวรรณยุกต์ทิ้ง"""
-    return _RE_NORMALIZE.sub("", text)
+def _normalize(word: str) -> str:
+    """
+    Remove silence, no sound, and tonal characters
+    ตัดอักษรที่ไม่ออกเสียง (การันต์ ไปยาลน้อย ไม้ยมก*) และวรรณยุกต์ทิ้ง
+    """
+    return _RE_NORMALIZE.sub("", word)
 
 
 def _replace_vowels(word: str) -> str:
@@ -162,19 +173,39 @@ def _replace_consonants(word: str, res: str) -> str:
     return word
 
 
-def romanize(word: str) -> str:
+# Support function for romanize()
+def _romanize(word: str) -> str:
+    """
+    :param str word: Thai word to be romanized, should have already been tokenized.
+    :return: Spells out how the Thai word should be pronounced.
+    """
     if not isinstance(word, str) or not word:
         return ""
 
-    word2 = _replace_vowels(_normalize(word))
-    res = _RE_CONSONANT.findall(word2)
+    word = _replace_vowels(_normalize(word))
+    res = _RE_CONSONANT.findall(word)
 
     # 2-character word, all consonants
-    if len(word2) == 2 and len(res) == 2:
-        word2 = list(word2)
-        word2.insert(1, "o")
-        word2 = "".join(word2)
-
-    word2 = _replace_consonants(word2, res)
-    
-    return word2
\ No newline at end of file
+    if len(word) == 2 and len(res) == 2:
+        word = list(word)
+        word.insert(1, "o")
+        word = "".join(word)
+
+    word = _replace_consonants(word, res)
+
+    return word
+
+
+def romanize(text: str) -> str:
+    """
+    Rendering Thai words in the Latin alphabet or "romanization",
+    using the Royal Thai General System of Transcription (RTGS),
+    which is the official system published by the Royal Institute of Thailand.
+    ถอดเสียงภาษาไทยเป็นอักษรละติน
+    :param str text: Thai text to be romanized
+    :return: A string of Thai words rendered in the Latin alphabet.
+    """
+    words = word_tokenize(text)
+    romanized_words = [_romanize(word) for word in words]
+
+    return "".join(romanized_words)
diff --git a/pythainlp/transliterate/thai2rom.py b/pythainlp/transliterate/thai2rom.py
index 1dc5a5267..41443d020 100644
--- a/pythainlp/transliterate/thai2rom.py
+++ b/pythainlp/transliterate/thai2rom.py
@@ -149,7 +149,7 @@ def __encode_input(self, name):
     def romanize(self, text):
         """
         :param str text: Thai text to be romanized
-        :return: English (more or less) text that spells out how the Thai text should read.
+        :return: English (more or less) text that spells out how the Thai text should be pronounced.
         """
         return self.__decode_sequence(self.__encode_input(text))
 
diff --git a/pythainlp/ulmfit/__init__.py b/pythainlp/ulmfit/__init__.py
index 00c9f8891..f2992d549 100644
--- a/pythainlp/ulmfit/__init__.py
+++ b/pythainlp/ulmfit/__init__.py
@@ -1,24 +1,26 @@
 # -*- coding: utf-8 -*-
-
 """
-Code by https://github.com/cstorm125/thai2fit/
+Code by Charin
+https://github.com/cstorm125/thai2fit/
 """
 import collections
 import re
-import emoji
 
+from typing import List
+
+import emoji
 import numpy as np
 import torch
-from fastai.text import TK_REP, BaseTokenizer, Tokenizer
+
+from fastai.text import TK_REP, BaseTokenizer
 from fastai.text.transform import (
-    deal_caps,
     fix_html,
     rm_useless_spaces,
     spec_add_spaces,
     replace_all_caps,
 )
+from pythainlp import word_tokenize
 from pythainlp.corpus import download, get_corpus_path
-from pythainlp.tokenize import word_tokenize
 from pythainlp.util import normalize as normalize_char_order
 
 __all__ = [
@@ -57,47 +59,51 @@ class ThaiTokenizer(BaseTokenizer):
     https://docs.fast.ai/text.transform#BaseTokenizer
     """
 
-    def __init__(self, lang="th"):
+    def __init__(self, lang: str = "th"):
         self.lang = lang
 
-    def tokenizer(self, t):
+    def tokenizer(self, text: str) -> List[str]:
         """
         :meth: tokenize text with a frozen newmm engine
-        :param str t: text to tokenize
+        :param str text: text to tokenize
         :return: tokenized text
         """
-        return word_tokenize(t, engine="ulmfit")
+        return word_tokenize(text, engine="ulmfit")
 
     def add_special_cases(self, toks):
         pass
 
 
-def replace_rep_after(t):
-    "Replace repetitions at the character level in `t` after the repetition"
+def replace_rep_after(text: str) -> str:
+    "Replace repetitions at the character level in `text` after the repetition"
 
     def _replace_rep(m):
         c, cc = m.groups()
         return f"{c}{TK_REP}{len(cc)+1}"
 
     re_rep = re.compile(r"(\S)(\1{2,})")
-    return re_rep.sub(_replace_rep, t)
+
+    return re_rep.sub(_replace_rep, text)
 
 
-def rm_useless_newlines(t):
-    "Remove multiple newlines in `t`."
-    return re.sub(r"[\n]{2,}", " ", t)
+def rm_useless_newlines(text: str) -> str:
+    "Remove multiple newlines in `text`."
 
+    return re.sub(r"[\n]{2,}", " ", text)
 
-def rm_brackets(t):
+
+def rm_brackets(text: str) -> str:
     "Remove all empty brackets from `t`."
-    new_line = re.sub(r"\(\)", "", t)
+    new_line = re.sub(r"\(\)", "", text)
     new_line = re.sub(r"\{\}", "", new_line)
     new_line = re.sub(r"\[\]", "", new_line)
+
     return new_line
 
 
 def ungroup_emoji(toks):
     "Ungroup emojis"
+
     res = []
     for tok in toks:
         if emoji.emoji_count(tok) == len(tok):
@@ -105,6 +111,7 @@ def ungroup_emoji(toks):
                 res.append(char)
         else:
             res.append(tok)
+
     return res
 
 
@@ -134,12 +141,12 @@ def lowercase_all(toks):
 _tokenizer = ThaiTokenizer()
 
 
-def document_vector(text, learn, data, agg="mean"):
+def document_vector(text: str, learn, data, agg: str = "mean"):
     """
     :meth: `document_vector` get document vector using fastai language model and data bunch
     :param str text: text to extract embeddings
     :param learn: fastai language model learner
-    :param data: fastai data bunch 
+    :param data: fastai data bunch
     :param agg: how to aggregate embeddings
     :return: `numpy.array` of document vector sized 400 based on the encoder of the model
     """
@@ -154,6 +161,7 @@ def document_vector(text, learn, data, agg="mean"):
         res = res.sum(0)
     else:
         raise ValueError("Aggregate by mean or sum")
+
     return res
 
 
diff --git a/pythainlp/util/digitconv.py b/pythainlp/util/digitconv.py
index 3982168d6..75717431a 100644
--- a/pythainlp/util/digitconv.py
+++ b/pythainlp/util/digitconv.py
@@ -61,7 +61,7 @@ def thai_digit_to_arabic_digit(text: str) -> str:
     :param str text: Text with Thai digits such as '๑', '๒', '๓'
     :return: Text with Thai digits being converted to Arabic digits such as '1', '2', '3'
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     newtext = []
@@ -79,7 +79,7 @@ def arabic_digit_to_thai_digit(text: str) -> str:
     :param str text: Text with Arabic digits such as '1', '2', '3'
     :return: Text with Arabic digits being converted to Thai digits such as '๑', '๒', '๓'
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     newtext = []
@@ -97,7 +97,7 @@ def digit_to_text(text: str) -> str:
     :param str text: Text with digits such as '1', '2', '๓', '๔'
     :return: Text with digits being spelled out in Thai
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return ""
 
     newtext = []
diff --git a/pythainlp/util/thai.py b/pythainlp/util/thai.py
index 70e5a9d15..53a8f4676 100644
--- a/pythainlp/util/thai.py
+++ b/pythainlp/util/thai.py
@@ -44,7 +44,7 @@ def countthai(text: str, ignore_chars: str = _DEFAULT_IGNORE_CHARS) -> float:
     :param str text: input text
     :return: float, proportion of characters in the text that is Thai character
     """
-    if not text:
+    if not text or not isinstance(text, str):
         return 0
 
     if not ignore_chars:
diff --git a/setup.cfg b/setup.cfg
index 785b90fc3..4a4459102 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 2.0.3
+current_version = 2.0.4
 commit = True
 tag = True
 
diff --git a/setup.py b/setup.py
index a6678cb86..7449ada24 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,7 @@
 
 setup(
     name="pythainlp",
-    version="2.0.3",
+    version="2.0.4",
     description="Thai Natural Language Processing library",
     long_description=readme,
     long_description_content_type="text/markdown",
diff --git a/tests/__init__.py b/tests/__init__.py
index b2f7c711f..3e8782c3f 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -29,6 +29,7 @@
 from pythainlp.tag.locations import tag_provinces
 from pythainlp.tag.named_entity import ThaiNameTagger
 from pythainlp.tokenize import (
+    DEFAULT_DICT_TRIE,
     FROZEN_DICT_TRIE,
     Tokenizer,
     dict_trie,
@@ -43,6 +44,7 @@
     tcc,
     word_tokenize,
 )
+from pythainlp.tokenize import deepcut as tokenize_deepcut
 from pythainlp.tokenize import pyicu as tokenize_pyicu
 from pythainlp.transliterate import romanize, transliterate
 from pythainlp.transliterate.ipa import trans_list, xsampa_list
@@ -290,33 +292,7 @@ def test_ner(self):
     # ### pythainlp.tokenize
 
     def test_dict_word_tokenize(self):
-        self.assertEqual(dict_word_tokenize("", custom_dict=FROZEN_DICT_TRIE), [])
-        self.assertIsNotNone(
-            dict_word_tokenize("รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE)
-        )
-        self.assertIsNotNone(dict_trie(()))
-        self.assertIsNotNone(
-            dict_word_tokenize(
-                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="newmm"
-            )
-        )
-        self.assertIsNotNone(
-            dict_word_tokenize(
-                "รถไฟฟ้ากรุงเทพBTSหูว์ค์",
-                custom_dict=FROZEN_DICT_TRIE,
-                engine="longest",
-            )
-        )
-        self.assertIsNotNone(
-            dict_word_tokenize(
-                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="mm"
-            )
-        )
-        self.assertIsNotNone(
-            dict_word_tokenize(
-                "รถไฟฟ้ากรุงเทพBTSหูว์ค์", custom_dict=FROZEN_DICT_TRIE, engine="XX"
-            )
-        )
+        self.assertEqual(dict_word_tokenize(""), [])
 
     def test_etcc(self):
         self.assertEqual(etcc.segment(""), "")
@@ -334,14 +310,34 @@ def test_word_tokenize(self):
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
-        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="ulmfit"))
-        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="XX"))
-        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
-        self.assertIsNotNone(word_tokenize("", engine="deepcut"))
+
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="newmm"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="mm"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="longest"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="ulmfit"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="icu"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="deepcut"))
+        self.assertIsNotNone(word_tokenize("หมอนทองตากลมหูว์MBK39", engine="XX"))
+
+        self.assertIsNotNone(dict_trie(()))
+        self.assertIsNotNone(dict_trie(("ทดสอบ", "สร้าง", "Trie")))
+        self.assertIsNotNone(dict_trie(["ทดสอบ", "สร้าง", "Trie"]))
+        self.assertIsNotNone(dict_trie(thai_words()))
+        self.assertIsNotNone(dict_trie(FROZEN_DICT_TRIE))
+
+        self.assertIsNotNone(word_tokenize("รถไฟฟ้าBTS", custom_dict=DEFAULT_DICT_TRIE))
+        self.assertIsNotNone(
+            word_tokenize("ทดสอบ", engine="deepcut", custom_dict=FROZEN_DICT_TRIE)
+        )
+        self.assertIsNotNone(
+            word_tokenize("ทดสอบ", engine="XX", custom_dict=FROZEN_DICT_TRIE)
+        )
 
     def test_Tokenizer(self):
-        t_test = Tokenizer()
+        t_test = Tokenizer(FROZEN_DICT_TRIE)
         self.assertEqual(t_test.word_tokenize(""), [])
+        t_test.set_tokenize_engine("longest")
+        self.assertEqual(t_test.word_tokenize(None), [])
 
     def test_word_tokenize_icu(self):
         self.assertEqual(tokenize_pyicu.segment(None), [])
@@ -351,14 +347,17 @@ def test_word_tokenize_icu(self):
             ["ฉัน", "รัก", "ภาษา", "ไทย", "เพราะ", "ฉัน", "เป็น", "คน", "ไทย"],
         )
 
-    # def test_word_tokenize_deepcut(self):
-    # self.assertEqual(deepcut.segment(None), [])
-    # self.assertEqual(deepcut.segment(""), [])
-    # self.assertIsNotNone(word_tokenize("ลึกลงไปลลลล", engine="deepcut"))
+    def test_word_tokenize_deepcut(self):
+        self.assertEqual(tokenize_deepcut.segment(None), [])
+        self.assertEqual(tokenize_deepcut.segment(""), [])
+        self.assertIsNotNone(tokenize_deepcut.segment("ทดสอบ", DEFAULT_DICT_TRIE))
+        self.assertIsNotNone(tokenize_deepcut.segment("ทดสอบ", ["ทด", "สอบ"]))
+        self.assertIsNotNone(word_tokenize("ทดสอบ", engine="deepcut"))
 
-    def test_word_tokenize_longest_matching(self):
+    def test_word_tokenize_longest(self):
         self.assertEqual(longest.segment(None), [])
         self.assertEqual(longest.segment(""), [])
+        self.assertIsNotNone(longest.segment("กรุงเทพฯมากๆเพราโพาง BKKฯ"))
         self.assertEqual(
             word_tokenize("ฉันรักภาษาไทยเพราะฉันเป็นคนไทย", engine="longest"),
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
@@ -373,7 +372,10 @@ def test_word_tokenize_mm(self):
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
 
+        self.assertIsNotNone(multi_cut.mmcut("ทดสอบ"))
+
         self.assertIsNotNone(multi_cut.find_all_segment("รถไฟฟ้ากรุงเทพมหานครBTS"))
+        self.assertEqual(multi_cut.find_all_segment(None), [])
 
     def test_word_tokenize_newmm(self):
         self.assertEqual(newmm.segment(None), [])
@@ -383,7 +385,9 @@ def test_word_tokenize_newmm(self):
             ["ฉัน", "รัก", "ภาษาไทย", "เพราะ", "ฉัน", "เป็น", "คนไทย"],
         )
         self.assertEqual(
-            word_tokenize("สวัสดีครับ สบายดีไหมครับ", engine="newmm"),
+            word_tokenize(
+                "สวัสดีครับ สบายดีไหมครับ", engine="newmm", keep_whitespace=True
+            ),
             ["สวัสดี", "ครับ", " ", "สบายดี", "ไหม", "ครับ"],
         )
         self.assertEqual(
@@ -391,7 +395,7 @@ def test_word_tokenize_newmm(self):
         )
         self.assertEqual(word_tokenize("จุ๋มง่วง", engine="newmm"), ["จุ๋ม", "ง่วง"])
         self.assertEqual(
-            word_tokenize("จุ๋ม   ง่วง", engine="newmm", whitespaces=False),
+            word_tokenize("จุ๋ม   ง่วง", engine="newmm", keep_whitespace=False),
             ["จุ๋ม", "ง่วง"],
         )
 
@@ -405,9 +409,10 @@ def test_sent_tokenize(self):
         self.assertEqual(sent_tokenize("รักน้ำ  รักปลา  "), ["รักน้ำ", "รักปลา"])
 
     def test_subword_tokenize(self):
-        self.assertEqual(subword_tokenize(None), "")
-        self.assertEqual(subword_tokenize(""), "")
-        self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร"))
+        self.assertEqual(subword_tokenize(None), [])
+        self.assertEqual(subword_tokenize(""), [])
+        self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="tcc"))
+        self.assertIsNotNone(subword_tokenize("สวัสดีดาวอังคาร", engine="etcc"))
 
     def test_syllable_tokenize(self):
         self.assertEqual(syllable_tokenize(None), [])
@@ -434,7 +439,13 @@ def test_romanize(self):
         self.assertEqual(romanize_royin(None), "")
         self.assertEqual(romanize_royin(""), "")
         self.assertEqual(romanize_royin("หาย"), "hai")
-        self.assertEqual(romanize_royin("หยาก"), "yak")
+        self.assertEqual(romanize_royin("หมอก"), "mok")
+        # self.assertEqual(romanize_royin("มหา"), "maha")  # not pass
+        # self.assertEqual(romanize_royin("หยาก"), "yak")  # not pass
+        # self.assertEqual(romanize_royin("อยาก"), "yak")  # not pass
+        # self.assertEqual(romanize_royin("ยมก"), "yamok")  # not pass
+        # self.assertEqual(romanize_royin("กลัว"), "klua")  # not pass
+        # self.assertEqual(romanize_royin("กลัว"), "klua")  # not pass
 
         self.assertEqual(romanize("แมว", engine="royin"), "maeo")
         self.assertEqual(romanize("เดือน", engine="royin"), "duean")