diff --git a/.travis.yml b/.travis.yml
index 5edc832d1..a44fee55c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,17 +3,21 @@
 
 language: python
 python:
-  - "2.7"
   - "3.4"
   - "3.5"
   - "3.6"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
-install: pip install -U tox
+install:
+  - pip install -r requirements-travis.txt
+  - pip install coveralls
 
 os:
   - linux
 # command to run tests, e.g. python setup.py test
-script: python setup.py test
+script:
+  coverage run --source=pythainlp setup.py test
+after_success:
+  coveralls
 
 # After you create the Github repo and add it to Travis, run the
 # travis_pypi_setup.py script to finish PyPI deployment setup
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 1f70fce22..842282fe7 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -7,6 +7,11 @@ Development Lead
 
 * Wannaphong Phatthiyaphaibun <wannaphong@yahoo.com>
 
+TCC & THAI SOUNDEX CODE
+------------
+
+* Korakot Chaovavanich
+
 Contributors
 ------------
 
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
deleted file mode 100644
index 5ecdb094c..000000000
--- a/CONTRIBUTING.rst
+++ /dev/null
@@ -1,114 +0,0 @@
-.. highlight:: shell
-
-============
-Contributing
-============
-
-Contributions are welcome, and they are greatly appreciated! Every
-little bit helps, and credit will always be given.
-
-You can contribute in many ways:
-
-Types of Contributions
-----------------------
-
-Report Bugs
-~~~~~~~~~~~
-
-Report bugs at https://github.com/wannaphongcom/pythainlp/issues.
-
-If you are reporting a bug, please include:
-
-* Your operating system name and version.
-* Any details about your local setup that might be helpful in troubleshooting.
-* Detailed steps to reproduce the bug.
-
-Fix Bugs
-~~~~~~~~
-
-Look through the GitHub issues for bugs. Anything tagged with "bug"
-and "help wanted" is open to whoever wants to implement it.
-
-Implement Features
-~~~~~~~~~~~~~~~~~~
-
-Look through the GitHub issues for features. Anything tagged with "enhancement"
-and "help wanted" is open to whoever wants to implement it.
-
-Write Documentation
-~~~~~~~~~~~~~~~~~~~
-
-PyThai-NLP could always use more documentation, whether as part of the
-official PyThai-NLP docs, in docstrings, or even on the web in blog posts,
-articles, and such.
-
-Submit Feedback
-~~~~~~~~~~~~~~~
-
-The best way to send feedback is to file an issue at https://github.com/wannaphongcom/pythainlp/issues.
-
-If you are proposing a feature:
-
-* Explain in detail how it would work.
-* Keep the scope as narrow as possible, to make it easier to implement.
-* Remember that this is a volunteer-driven project, and that contributions
-  are welcome :)
-
-Get Started!
-------------
-
-Ready to contribute? Here's how to set up `pythainlp` for local development.
-
-1. Fork the `pythainlp` repo on GitHub.
-2. Clone your fork locally::
-
-    $ git clone git@github.com:your_name_here/pythainlp.git
-
-3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed, this is how you set up your fork for local development::
-
-    $ mkvirtualenv pythainlp
-    $ cd pythainlp/
-    $ python setup.py develop
-
-4. Create a branch for local development::
-
-    $ git checkout -b name-of-your-bugfix-or-feature
-
-   Now you can make your changes locally.
-
-5. When you're done making changes, check that your changes pass flake8 and the tests, including testing other Python versions with tox::
-
-    $ flake8 pythainlp tests
-    $ python setup.py test or py.test
-    $ tox
-
-   To get flake8 and tox, just pip install them into your virtualenv.
-
-6. Commit your changes and push your branch to GitHub::
-
-    $ git add .
-    $ git commit -m "Your detailed description of your changes."
-    $ git push origin name-of-your-bugfix-or-feature
-
-7. Submit a pull request through the GitHub website.
-
-Pull Request Guidelines
------------------------
-
-Before you submit a pull request, check that it meets these guidelines:
-
-1. The pull request should include tests.
-2. If the pull request adds functionality, the docs should be updated. Put
-   your new functionality into a function with a docstring, and add the
-   feature to the list in README.rst.
-3. The pull request should work for Python 2.6, 2.7, 3.3, 3.4 and 3.5, and for PyPy. Check
-   https://travis-ci.org/wannaphongcom/pythainlp/pull_requests
-   and make sure that the tests pass for all supported Python versions.
-
-Tips
-----
-
-To run a subset of tests::
-
-
-    $ python -m unittest tests.test_pythainlp
diff --git a/README.md b/README.md
index 638cd5dd5..73ac0d84c 100644
--- a/README.md
+++ b/README.md
@@ -2,18 +2,85 @@
 [![PyPI Downloads](https://img.shields.io/pypi/dm/pythainlp.png)]
 [![pypi](https://img.shields.io/pypi/v/pythainlp.svg)](https://pypi.python.org/pypi/pythainlp)
 [![Build Status](https://travis-ci.org/wannaphongcom/pythainlp.svg?branch=develop)](https://travis-ci.org/wannaphongcom/pythainlp)
-[![Build status](https://ci.appveyor.com/api/projects/status/uxerymgggp1uch0p?svg=true)](https://ci.appveyor.com/project/wannaphongcom/pythainlp)
+[![Build status](https://ci.appveyor.com/api/projects/status/uxerymgggp1uch0p?svg=true)](https://ci.appveyor.com/project/wannaphongcom/pythainlp)[![Code Issues](https://www.quantifiedcode.com/api/v1/project/7f699ed4cad24be18d0d24ebd60d7543/badge.svg)](https://www.quantifiedcode.com/app/project/7f699ed4cad24be18d0d24ebd60d7543)[![Coverage Status](https://coveralls.io/repos/github/wannaphongcom/pythainlp/badge.svg?branch=pythainlp1.4)](https://coveralls.io/github/wannaphongcom/pythainlp?branch=pythainlp1.4)
 
+## English
 
-Homepages :[https://sites.google.com/view/pythainlp/home](https://sites.google.com/view/pythainlp/home)
+Thai natural language processing in Python.
+
+PyThaiNLP is python module like nltk , but It's working with thai language.
+
+It's support python 3.4 +.
+
+### Project status
+
+Developing
+
+### Version
+
+1.4
+
+### Capability
+
+- Thai segment
+- Thai wordnet
+- Thai Character Clusters (TCC) and ETCC
+- Thai stop word
+- Thai meta sound
+- Thai soundex
+- Thai postaggers
+- Thai romanization
+- Check the wrong words in Thai.
+
+and much more.
+
+### Install
+
+**using pip.**
+
+```sh
+$ pip install pythainlp
+```
+
+**Install in  Windows**
+
+download pyicu from [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) than install pyicu. install pythainlp using pip.
+
+```
+pip install pythainlp
+```
+
+**Install in MacOS**
+
+```sh
+$ brew install icu4c --force
+$ brew link --force icu4c
+$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
+```
+
+### Documentation
+
+Read on https://github.com/wannaphongcom/pythainlp/blob/pythainlp1.4/docs/pythainlp-1-4-eng.md
+
+### License
+
+Apache Software License 2.0
+
+## ภาษาไทย
+
+[![PyPI Downloads](https://img.shields.io/pypi/dm/pythainlp.png)]
+[![pypi](https://img.shields.io/pypi/v/pythainlp.svg)](https://pypi.python.org/pypi/pythainlp)
+[![Build Status](https://travis-ci.org/wannaphongcom/pythainlp.svg?branch=develop)](https://travis-ci.org/wannaphongcom/pythainlp)
+[![Build status](https://ci.appveyor.com/api/projects/status/uxerymgggp1uch0p?svg=true)](https://ci.appveyor.com/project/wannaphongcom/pythainlp)[![Code Issues](https://www.quantifiedcode.com/api/v1/project/7f699ed4cad24be18d0d24ebd60d7543/badge.svg)](https://www.quantifiedcode.com/app/project/7f699ed4cad24be18d0d24ebd60d7543)[![Coverage Status](https://coveralls.io/repos/github/wannaphongcom/pythainlp/badge.svg?branch=pythainlp1.4)](https://coveralls.io/github/wannaphongcom/pythainlp?branch=pythainlp1.4)
 
 ประมวลภาษาธรรมชาติภาษาไทยในภาษา Python
 
-Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่องานวิจัยและพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python
+Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน !**
+
+> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
 
 รองรับ Python 3.4 ขึ้นไป
 
-  - เอกสารการใช้งาน : [https://sites.google.com/view/pythainlp/home](https://sites.google.com/view/pythainlp/home)
   - หน้าหลัก GitHub :  [https://github.com/wannaphongcom/pythainlp](https://github.com/wannaphongcom/pythainlp)
 
 ### สถานะโครงการ
@@ -21,7 +88,15 @@ Natural language processing หรือ การประมวลภาษา
 กำลังพัฒนา 
 
 ### Version
-1.3
+1.4
+
+### สิ่งใหม่ที่เพิ่มเข้ามาใน PyThaiNLP 1.4
+
+- รองรับ  Thai Character Clusters (TCC) และ ETCC
+- Thai WordNet ตัวใหม่
+- เพิ่มหลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน
+- เพิ่ม Meta Sound ภาษาไทย
+- เพิ่ม Thai Soundex
 
 ### ความสามารถ
   - ตัดคำภาษาไทย
@@ -31,9 +106,14 @@ Natural language processing หรือ การประมวลภาษา
   - เรียงจำนวนคำของประโยค
   - แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
   - เช็คคำผิดในภาษาไทย
+  - รองรับ  Thai Character Clusters (TCC) และ ETCC
+  - Thai WordNet
+  - Stop Word ภาษาไทย
+  - Meta Sound ภาษาไทย
+  - Thai Soundex
   - และอื่น ๆ 
 
-# ติดตั้ง
+### ติดตั้ง
 
 รองรับ Python 3.4 ขึ้นไป
 
@@ -70,9 +150,9 @@ $ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip i
 ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
 
 
-# เอกสารการใช้งานเบื้องต้น
+### เอกสารการใช้งาน
 
-อ่านได้ที่ https://github.com/wannaphongcom/pythainlp/blob/master/docs/pythainlp-1-3-thai.md
+อ่านได้ที่ https://github.com/wannaphongcom/pythainlp/blob/pythainlp1.4/docs/pythainlp-1-4-thai.md
 
 ### License
 
diff --git a/References.md b/References.md
new file mode 100644
index 000000000..1e29854a3
--- /dev/null
+++ b/References.md
@@ -0,0 +1,5 @@
+# References
+
+Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance 	with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
+
+T. Teeramunkong, V. Sornlertlamvanich, T. Tanhermhong and W. Chinnan, “Character cluster based Thai information retrieval,” in IRAL '00 Proceedings of the fifth international workshop on on Information retrieval with Asian languages, 2000. 
\ No newline at end of file
diff --git a/appveyor.yml b/appveyor.yml
index 51d87df9e..c9efefcc9 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,97 +1,20 @@
-environment:
-  global:
-    # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the
-    # /E:ON and /V:ON options are not enabled in the batch script intepreter
-    # See: http://stackoverflow.com/a/13751649/163740
-    CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\appveyor\\run_with_env.cmd"
+build: false
 
+environment:
   matrix:
-    # Pre-installed Python versions, which Appveyor may upgrade to
-    # a later point release.
-    # See: http://www.appveyor.com/docs/installed-software#python
-
-    - PYTHON: "C:\\Python27"
-      PYTHON_VERSION: "2.7.x" # currently 2.7.9
-      PYTHON_ARCH: "32"
-
-    - PYTHON: "C:\\Python27-x64"
-      PYTHON_VERSION: "2.7.x" # currently 2.7.9
-      PYTHON_ARCH: "64"
-
-    - PYTHON: "C:\\Python34"
-      PYTHON_VERSION: "3.4.x" # currently 3.4.3
-      PYTHON_ARCH: "32"
+    - PYTHON: "C:/Python35"
+    - PYTHON: "C:/Python36"
 
-    - PYTHON: "C:\\Python34-x64"
-      PYTHON_VERSION: "3.4.x" # currently 3.4.3
-      PYTHON_ARCH: "64"
-
-    - PYTHON: "C:\\Python35"
-      PYTHON_VERSION: "3.5.x" # currently 3.4.3
-      PYTHON_ARCH: "32"
-
-    - PYTHON: "C:\\Python35-x64"
-      PYTHON_VERSION: "3.5.x" # currently 3.4.3
-      PYTHON_ARCH: "64"
+init:
+  - "ECHO %PYTHON%"
+  - ps: "ls C:/Python*"
 
 install:
-  # If there is a newer build queued for the same PR, cancel this one.
-  # The AppVeyor 'rollout builds' option is supposed to serve the same
-  # purpose but it is problematic because it tends to cancel builds pushed
-  # directly to master instead of just PR builds (or the converse).
-  # credits: JuliaLang developers.
-  - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod `
-        https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | `
-        Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { `
-          throw "There are newer queued builds for this pull request, failing early." }
-  - ECHO "Filesystem root:"
-  - ps: "ls \"C:/\""
-
-  - ECHO "Installed SDKs:"
-  - ps: "ls \"C:/Program Files/Microsoft SDKs/Windows\""
-
-  # Install Python (from the official .msi of http://python.org) and pip when
-  # not already installed.
-  - ps: if (-not(Test-Path($env:PYTHON))) { & appveyor\install.ps1 }
-
-  # Prepend newly installed Python to the PATH of this build (this cannot be
-  # done from inside the powershell script as it would require to restart
-  # the parent CMD process).
-  - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
-
-  # Check that we have the expected version and architecture for Python
-  - "python --version"
-  - "python -c \"import struct; print(struct.calcsize('P') * 8)\""
-
-  # Upgrade to the latest version of pip to avoid it displaying warnings
-  # about it being out of date.
-  - "pip install --disable-pip-version-check --user --upgrade pip"
-
-  # Install the build dependencies of the project. If some dependencies contain
-  # compiled extensions and are not provided as pre-built wheel packages,
-  # pip will build them from source using the MSVC compiler matching the
-  # target Python version and architecture
-  - "%CMD_IN_ENV% pip install -r dev-requirements.txt"
-
-build_script:
-  # Build the compiled extension
-  - "%CMD_IN_ENV% python setup.py build"
+  # FIXME: updating pip fails with PermissionError
+  # - "%PYTHON%/Scripts/pip.exe install -U pip setuptools"
+  - "%PYTHON%/Scripts/pip.exe install -e ."
 
 test_script:
-  # Run the project tests
-  - "%CMD_IN_ENV% python setup.py nosetests"
-
-after_test:
-  # If tests are successful, create binary packages for the project.
-  - "%CMD_IN_ENV% python setup.py bdist_wheel"
-  - "%CMD_IN_ENV% python setup.py bdist_wininst"
-  - "%CMD_IN_ENV% python setup.py bdist_msi"
-  - ps: "ls dist"
-
-artifacts:
-  # Archive the generated packages in the ci.appveyor.com build report.
-  - path: dist\*
-
-#on_success:
-#  - TODO: upload the content of dist/*.whl to a public wheelhouse
-#
+  - "%PYTHON%/Scripts/pip.exe --version"
+  - "%PYTHON%/Scripts/http.exe --debug"
+  - "%PYTHON%/python.exe setup.py test"
diff --git a/docs/pythainlp-1-4-eng.md b/docs/pythainlp-1-4-eng.md
new file mode 100644
index 000000000..656e29480
--- /dev/null
+++ b/docs/pythainlp-1-4-eng.md
@@ -0,0 +1,307 @@
+# User manual PyThaiNLP 1.4
+
+## API
+
+### Thai segment
+
+```python
+from pythainlp.tokenize import word_tokenize
+word_tokenize(text,engine)
+```
+**text** is thai text.
+
+**engine** is thai segment system have 6 engine
+
+1. icu - using pyicu. (default)
+2. dict - using dict . returns False if the message can not be wrapped.
+3. mm - using Maximum Matching algorithm in thai segment.
+4. newmm - using Maximum Matching algorithm in thai segment. credit Korakot Chaovavanich  from https://www.facebook.com/groups/408004796247683/permalink/431283740586455/
+5. pylexto using LexTo in thai segment.
+6. deepcut using deepcut from https://github.com/rkcosmos/deepcut in thai segment.
+
+returns ''list'' ex. ['แมว','กิน']
+
+**ตัวอย่าง**
+
+```python
+from pythainlp.tokenize import word_tokenize
+text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
+a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
+b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+```
+
+### Thai postaggers
+
+```python
+from pythainlp.tag import pos_tag
+pos_tag(list,engine='old')
+```
+
+engine
+
+1. old is UnigramTagger (default)
+2. artagger is RDR POS Tagger.
+
+### Thai romanization
+
+```python
+from pythainlp.romanization import romanization
+romanization(str,engine='pyicu')
+```
+It's have 2 engine
+
+- pyicu
+- royin
+
+data :
+
+input ''str''
+
+returns ''str'' 
+
+**Example**
+
+```python
+from pythainlp.romanization import romanization
+romanization("แมว") # 'mæw'
+```
+
+### Check the wrong word 
+
+Before using this ability. Install hunspell and hunspell-th first.
+
+```python
+from pythainlp.spell import *
+a=spell("สี่เหลียม")
+print(a) # ['สี่เหลี่ยม', 'เสียเหลี่ยม', 'เหลี่ยม']
+```
+### pythainlp.number
+
+```python
+from pythainlp.number import *
+```
+- nttn(str)  - To convert thai numbers to numbers.
+- nttt(str) - Thai Numbers to text.
+- ntnt(str) - numbers to thai numbers.
+- ntt(str) -  numbers to text.
+- ttn(str) - text to  numbers.
+- numtowords(float) -  Read thai numbers (Baht) input ''float'' returns  'str'
+
+### Sorting List of Thai Information in List
+
+```python
+from pythainlp.collation import collation
+print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
+```
+
+input list 
+
+returns list
+
+### Get current time in Thai
+
+```python
+from pythainlp.date import now
+now() # '30 พฤษภาคม 2560 18:45:24'
+```
+### Thai WordNet
+
+import
+
+```python
+from pythainlp.corpus import wordnet
+```
+
+**Use**
+
+It's like nltk.
+
+- wordnet.synsets(word)
+- wordnet.synset(name_synsets)
+- wordnet.all_lemma_names(pos=None, lang="tha")
+- wordnet.all_synsets(pos=None)
+- wordnet.langs()
+- wordnet.lemmas(word,pos=None,lang="tha")
+- wordnet.lemma(name_synsets)
+- wordnet.lemma_from_key(key)
+- wordnet.path_similarity(synsets1,synsets2)
+- wordnet.lch_similarity(synsets1,synsets2)
+- wordnet.wup_similarity(synsets1,synsets2)
+- wordnet.morphy(form, pos=None)
+- wordnet.custom_lemmas(tab_file, lang)
+
+**Example**
+
+```python
+>>> from pythainlp.corpus import wordnet
+>>> print(wordnet.synsets('หนึ่ง'))
+[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
+>>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
+[]
+>>> print(wordnet.synset('one.s.05'))
+Synset('one.s.05')
+>>> print(wordnet.synset('spy.n.01').lemmas())
+[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
+>>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
+['สปาย', 'สายลับ']
+```
+
+### Find words with the most usage.
+
+```python
+from pythainlp.rank import rank
+rank(list)
+```
+
+returns dict
+
+**Example**
+
+```python
+>>> rank(['แมง','แมง','คน'])
+Counter({'แมง': 2, 'คน': 1})
+```
+
+### Solve printing problems forget to change language
+
+```python
+from pythainlp.change import *
+```
+
+- texttothai(str) - eng to thai.
+- texttoeng(str) - thai to eng.
+
+### Thai Character Clusters (TCC)
+
+TCC : Mr.Jakkrit TeCho
+
+grammar :  Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
+
+Code :  Korakot Chaovavanich 
+
+**Example**
+
+```python
+>>> from pythainlp.tokenize import tcc
+>>> tcc.tcc('ประเทศไทย')
+'ป/ระ/เท/ศ/ไท/ย'
+```
+
+### Enhanced Thai Character Cluster (ETCC)
+
+**Example**
+
+```python
+>>> from pythainlp.tokenize import etcc
+>>> etcc.etcc('คืนความสุข')
+'/คืน/ความสุข'
+```
+
+### Thai Soundex
+
+credit Korakot Chaovavanich (from https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
+
+- LK82
+- Udom83
+
+**Example**
+
+```python
+>>> from pythainlp.soundex import LK82
+>>> print(LK82('รถ'))
+ร3000
+>>> print(LK82('รด'))
+ร3000
+>>> print(LK82('จัน'))
+จ4000
+>>> print(LK82('จันทร์'))
+จ4000
+>>> print(Udom83('รถ'))
+ร800000
+```
+
+### Thai meta sound
+
+```
+Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
+```
+
+**Example**
+
+```python
+>>> from pythainlp.MetaSound import *
+>>> MetaSound('คน')
+'15'
+```
+
+### Thai sentiment analysis
+
+using data from [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
+
+```python
+from pythainlp.sentiment import sentiment
+sentiment(str)
+```
+
+input str returns pos , neg or neutral
+
+### Util
+
+using
+
+```python
+from pythainlp.util import *
+```
+
+#### ngrams
+
+for building ngrams 
+
+```python
+ngrams(token,num)
+```
+
+- token - list
+- num - ngrams
+
+### Corpus
+
+#### Thai stopword
+
+```python
+from pythainlp.corpus import stopwords
+stopwords = stopwords.words('thai')
+```
+
+#### Thai country name
+
+```python
+from pythainlp.corpus import country
+country.get_data()
+```
+
+#### Tone in Thai
+
+```python
+from pythainlp.corpus import tone
+tone.get_data()
+```
+
+#### Consonant in thai
+
+```python
+from pythainlp.corpus import alphabet
+alphabet.get_data()
+```
+
+#### Word list in thai
+
+```python
+from pythainlp.corpus.thaiword import get_data # old data
+get_data()
+from pythainlp.corpus.newthaiword import get_data # new data
+get_data()
+```
diff --git a/docs/pythainlp-1-4-thai.md b/docs/pythainlp-1-4-thai.md
new file mode 100644
index 000000000..35a3f7f97
--- /dev/null
+++ b/docs/pythainlp-1-4-thai.md
@@ -0,0 +1,373 @@
+# คู่มือการใช้งาน PyThaiNLP 1.4
+
+Natural language processing หรือ การประมวลภาษาธรรมชาติ  โมดูล PyThaiNLP เป็นโมดูลที่ถูกพัฒนาขึ้นเพื่อพัฒนาการประมวลภาษาธรรมชาติภาษาไทยในภาษา Python และ**มันฟรี (ตลอดไป) เพื่อคนไทยและชาวโลกทุกคน !**
+
+> เพราะโลกขับเคลื่อนต่อไปด้วยการแบ่งปัน
+
+รองรับเฉพาะ Python 3.4 ขึ้นไปเท่านั้น
+
+ติดตั้งใช้คำสั่ง
+
+```
+pip install pythainlp
+```
+
+**วิธีติดตั้งสำหรับ Windows**
+
+ให้ทำการติดตั้ง pyicu โดยใช้ไฟล์ .whl จาก [http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu](http://www.lfd.uci.edu/~gohlke/pythonlibs/#pyicu) 
+
+หากใช้ python 3.5 64 bit ให้โหลด PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl แล้วเปิด cmd ใช้คำสั่ง
+
+```
+pip install PyICU‑1.9.7‑cp35‑cp35m‑win_amd64.whl
+```
+
+แล้วจึงใช้ 
+
+```
+pip install pythainlp
+```
+
+**ติดตั้งบน Mac**
+
+```sh
+$ brew install icu4c --force
+$ brew link --force icu4c
+$ CFLAGS=-I/usr/local/opt/icu4c/include LDFLAGS=-L/usr/local/opt/icu4c/lib pip install pythainlp
+```
+
+ข้อมูลเพิ่มเติม [คลิกที่นี้](https://medium.com/data-science-cafe/install-polyglot-on-mac-3c90445abc1f#.rdfrorxjx)
+
+## API
+
+### ตัดคำไทย
+
+สำหรับการตัดคำไทยนั้น ใช้ API ดังต่อไปนี้
+
+```python
+from pythainlp.tokenize import word_tokenize
+word_tokenize(text,engine)
+```
+text คือ ข้อความในรูปแบบสตริง str เท่านั้น
+
+engine คือ ระบบตัดคำไทย ปัจจุบันนี้ PyThaiNLP ได้พัฒนามี 6 engine ให้ใช้งานกันดังนี้
+
+1. icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ) และเป็นค่าเริ่มต้น
+2. dict - เป็นการตัดคำโดยใช้พจานุกรมจาก thaiword.txt ใน corpus  (ความแม่นยำปานกลาง) จะคืนค่า False หากข้อความนั้นไม่สามารถตัดคำได้
+3. mm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย - API ชุดเก่า
+4. newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่ โดยใช้โค้ดคุณ Korakot Chaovavanich  จาก https://www.facebook.com/groups/408004796247683/permalink/431283740586455/ มาพัฒนาต่อ
+5. pylexto ใช้ LexTo ในการตัดคำ
+6. deepcut ใช้ deepcut จาก https://github.com/rkcosmos/deepcut ในการตัดคำภาษาไทย
+
+คืนค่าเป็น ''list'' เช่น ['แมว','กิน']
+
+**ตัวอย่าง**
+
+```python
+from pythainlp.tokenize import word_tokenize
+text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
+a=word_tokenize(text,engine='icu') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอ', 'เค', 'บ่', 'พวก', 'เรา', 'เป็น', 'คน', 'ไทย', 'รัก', 'ภาษา', 'ไทย', 'ภาษา', 'บ้าน', 'เกิด']
+b=word_tokenize(text,engine='dict') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+c=word_tokenize(text,engine='mm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+d=word_tokenize(text,engine='pylexto') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+e=word_tokenize(text,engine='newmm') # ['ผม', 'รัก', 'คุณ', 'นะ', 'ครับ', 'โอเค', 'บ่', 'พวกเรา', 'เป็น', 'คนไทย', 'รัก', 'ภาษาไทย', 'ภาษา', 'บ้านเกิด']
+```
+
+### Postaggers ภาษาไทย
+
+```python
+from pythainlp.tag import pos_tag
+pos_tag(list,engine='old')
+```
+
+list คือ list ที่เก็บข้อความหลังผ่านการตัดคำแล้ว
+
+engine คือ ชุดเครื่องมือในการ postaggers มี 2 ตัวดังนี้
+
+1. old เป็น UnigramTagger (ค่าเริ่มต้น)
+2. artagger เป็น RDR POS Tagger ละเอียดยิ่งกว่าเดิม รองรับเฉพาะ Python 3 เท่านั้น
+
+### แปลงข้อความเป็น Latin
+
+```python
+from pythainlp.romanization import romanization
+romanization(str,engine='pyicu')
+```
+มี 2 engine ดังนี้
+
+- pyicu ส่งค่า Latin
+- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน (**หากมีข้อผิดพลาด ให้ใช้คำอ่าน เนื่องจากตัว royin ไม่มีตัวแปลงคำเป็นคำอ่าน**)
+
+data :
+
+รับค่า ''str'' ข้อความ 
+
+คืนค่าเป็น ''str'' ข้อความ
+
+**ตัวอย่าง**
+
+```python
+from pythainlp.romanization import romanization
+romanization("แมว") # 'mæw'
+```
+
+### เช็คคำผิด 
+
+ก่อนใช้งานความสามารถนี้ ให้ทำการติดตั้ง hunspell และ hunspell-th ก่อน
+
+**วิธีติดตั้ง** สำหรับบน Debian , Ubuntu
+
+```
+sudo apt-get install hunspell hunspell-th
+```
+
+บน Mac OS ติดตั้งตามนี้ [http://pankdm.github.io/hunspell.html](http://pankdm.github.io/hunspell.html)
+
+ให้ใช้ pythainlp.spell ตามตัวอย่างนี้
+
+```python
+from pythainlp.spell import *
+a=spell("สี่เหลียม")
+print(a) # ['สี่เหลี่ยม', 'เสียเหลี่ยม', 'เหลี่ยม']
+```
+### pythainlp.number
+
+```python
+from pythainlp.number import *
+```
+จัดการกับตัวเลข โดยมีดังนี้
+
+- nttn(str)  - เป็นการแปลงเลขไทยสู่เลข
+- nttt(str) - เลขไทยสู่ข้อความ
+- ntnt(str) - เลขสู่เลขไทย
+- ntt(str) - เลขสู่ข้อความ
+- ttn(str) - ข้อความสู่เลข
+- numtowords(float) -  อ่านจำนวนตัวเลขภาษาไทย (บาท) รับค่าเป็น ''float'' คืนค่าเป็น  'str'
+
+### เรียงลำดับข้อมูลภาษาไทยใน List
+
+```python
+from pythainlp.collation import collation
+print(collation(['ไก่','ไข่','ก','ฮา'])) # ['ก', 'ไก่', 'ไข่', 'ฮา']
+```
+
+รับ list คืนค่า list
+
+### รับเวลาปัจจุบันเป็นภาษาไทย
+
+```python
+from pythainlp.date import now
+now() # '30 พฤษภาคม 2560 18:45:24'
+```
+### WordNet ภาษาไทย
+
+เรียกใช้งาน
+
+```python
+from pythainlp.corpus import wordnet
+```
+
+**การใช้งาน**
+
+API เหมือนกับ NLTK โดยรองรับ API ดังนี้
+
+- wordnet.synsets(word)
+- wordnet.synset(name_synsets)
+- wordnet.all_lemma_names(pos=None, lang="tha")
+- wordnet.all_synsets(pos=None)
+- wordnet.langs()
+- wordnet.lemmas(word,pos=None,lang="tha")
+- wordnet.lemma(name_synsets)
+- wordnet.lemma_from_key(key)
+- wordnet.path_similarity(synsets1,synsets2)
+- wordnet.lch_similarity(synsets1,synsets2)
+- wordnet.wup_similarity(synsets1,synsets2)
+- wordnet.morphy(form, pos=None)
+- wordnet.custom_lemmas(tab_file, lang)
+
+**ตัวอย่าง**
+
+```python
+>>> from pythainlp.corpus import wordnet
+>>> print(wordnet.synsets('หนึ่ง'))
+[Synset('one.s.05'), Synset('one.s.04'), Synset('one.s.01'), Synset('one.n.01')]
+>>> print(wordnet.synsets('หนึ่ง')[0].lemma_names('tha'))
+[]
+>>> print(wordnet.synset('one.s.05'))
+Synset('one.s.05')
+>>> print(wordnet.synset('spy.n.01').lemmas())
+[Lemma('spy.n.01.spy'), Lemma('spy.n.01.undercover_agent')]
+>>> print(wordnet.synset('spy.n.01').lemma_names('tha'))
+['สปาย', 'สายลับ']
+```
+
+### หาคำที่มีจำนวนการใช้งานมากที่สุด
+
+```python
+from pythainlp.rank import rank
+rank(list)
+```
+
+คืนค่าออกมาเป็น dict
+
+**ตัวอย่างการใช้งาน**
+
+```python
+>>> rank(['แมง','แมง','คน'])
+Counter({'แมง': 2, 'คน': 1})
+```
+
+### แก้ไขปัญหาการพิมพ์ลืมเปลี่ยนภาษา
+
+```python
+from pythainlp.change import *
+```
+
+มีคำสั่งดังนี้
+
+- texttothai(str) แปลงแป้นตัวอักษรภาษาอังกฤษเป็นภาษาไทย
+- texttoeng(str) แปลงแป้นตัวอักษรภาษาไทยเป็นภาษาอังกฤษ
+
+คืนค่าออกมาเป็น str
+
+### Thai Character Clusters (TCC)
+
+PyThaiNLP 1.4 รองรับ Thai Character Clusters (TCC) โดยจะแบ่งกลุ่มด้วย /
+
+**เดติด**
+
+TCC : Mr.Jakkrit TeCho
+
+grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
+
+โค้ด : คุณ Korakot Chaovavanich 
+
+**การใช้งาน**
+
+```python
+>>> from pythainlp.tokenize import tcc
+>>> tcc.tcc('ประเทศไทย')
+'ป/ระ/เท/ศ/ไท/ย'
+```
+
+### Enhanced Thai Character Cluster (ETCC)
+
+นอกจาก TCC แล้ว PyThaiNLP 1.4 ยังรองรับ Enhanced Thai Character Cluster (ETCC) โดยแบ่งกลุ่มด้วย /
+
+**การใช้งาน**
+
+```python
+>>> from pythainlp.tokenize import etcc
+>>> etcc.etcc('คืนความสุข')
+'/คืน/ความสุข'
+```
+
+### Thai Soundex ภาษาไทย
+
+เดติด คุณ Korakot Chaovavanich (จาก https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
+
+กฎที่รองรับในเวชั่น 1.4
+
+- กฎการเข้ารหัสซาวน์เด็กซ์ของ  วิชิตหล่อจีระชุณห์กุล  และ  เจริญ  คุวินทร์พันธุ์ - LK82
+- กฎการเข้ารหัสซาวน์เด็กซ์ของ วรรณี อุดมพาณิชย์ - Udom83
+
+**การใช้งาน**
+
+```python
+>>> from pythainlp.soundex import LK82
+>>> print(LK82('รถ'))
+ร3000
+>>> print(LK82('รด'))
+ร3000
+>>> print(LK82('จัน'))
+จ4000
+>>> print(LK82('จันทร์'))
+จ4000
+>>> print(Udom83('รถ'))
+ร800000
+```
+
+### Meta Sound ภาษาไทย
+
+```
+Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
+```
+
+**การใช้งาน**
+
+```python
+>>> from pythainlp.MetaSound import *
+>>> MetaSound('คน')
+'15'
+```
+
+### Sentiment analysis ภาษาไทย
+
+ใช้ข้อมูลจาก [https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/](https://github.com/wannaphongcom/lexicon-thai/tree/master/ข้อความ/)
+
+```python
+from pythainlp.sentiment import sentiment
+sentiment(str)
+```
+
+รับค่า str ส่งออกเป็น pos , neg หรือ neutral
+
+### Util
+
+การใช้งาน
+
+```python
+from pythainlp.util import *
+```
+
+#### ngrams
+
+สำหรับสร้าง ngrams 
+
+```python
+ngrams(token,num)
+```
+
+- token คือ list
+- num คือ จำนวน ngrams
+
+### Corpus
+
+#### stopword ภาษาไทย
+
+```python
+from pythainlp.corpus import stopwords
+stopwords = stopwords.words('thai')
+```
+
+#### ชื่อประเทศ ภาษาไทย
+
+```python
+from pythainlp.corpus import country
+country.get_data()
+```
+
+#### ตัววรรณยุกต์ในภาษาไทย
+
+```python
+from pythainlp.corpus import tone
+tone.get_data()
+```
+
+#### ตัวพยัญชนะในภาษาไทย
+
+```python
+from pythainlp.corpus import alphabet
+alphabet.get_data()
+```
+
+#### รายการคำในภาษาไทย
+
+```python
+from pythainlp.corpus.thaiword import get_data # ข้อมูลเก่า
+get_data()
+from pythainlp.corpus.newthaiword import get_data # ข้อมูลใหม่
+get_data()
+```
+
+เขียนโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
\ No newline at end of file
diff --git "a/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md" "b/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md"
new file mode 100644
index 000000000..2357c6ccb
--- /dev/null
+++ "b/docs/\340\270\247\340\270\264\340\270\230\340\270\265\340\270\225\340\270\264\340\270\224\340\270\225\340\270\261\340\271\211\340\270\207 PyThaiNLP \340\271\200\340\270\247\340\270\212\340\270\261\340\271\210\340\270\231\340\270\245\340\271\210\340\270\262\340\270\252\340\270\270\340\270\224\340\270\210\340\270\262\340\270\201 GitHub.md"	
@@ -0,0 +1,7 @@
+# วิธีติดตั้ง PyThaiNLP เวชั่นล่าสุดจาก GitHub
+
+ใช้คำสั่งนี้ในคอมมาไลน์
+
+```
+pip install -U https://github.com/wannaphongcom/pythainlp/archive/pythainlp1.4.zip
+```
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
deleted file mode 100644
index 2e032ab23..000000000
--- a/mkdocs.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-site_name: PyThaiNLP
-theme: readthedocs
-pages:
-- Home: index.md
-- Install: install.md
-- Docs: docs.md
-- License: license.md
-- About: about.md
\ No newline at end of file
diff --git a/pythainlp/MetaSound.py b/pythainlp/MetaSound.py
new file mode 100644
index 000000000..af708dc98
--- /dev/null
+++ b/pythainlp/MetaSound.py
@@ -0,0 +1,52 @@
+'''
+MetaSound
+
+References
+
+Snae & Brückner. (2009). Novel Phonetic Name Matching Algorithm with a Statistical Ontology for Analysing Names Given in Accordance with Thai Astrology. Retrieved from https://pdfs.semanticscholar.org/3983/963e87ddc6dfdbb291099aa3927a0e3e4ea6.pdf
+'''
+import re
+def MetaSound(name):
+    '''
+    MetaSound(str)
+    '''
+    name1=list(name)
+    count=len(name1)
+    word=[]
+    i=0
+    while i<count:
+        if (re.search(r'[ก-ฮ]',name1[i]),re.U):
+            word.append(name1[i])
+        i+=1
+    i=0
+    count=len(name1)
+    while i<count:
+        if (re.search('์',name1[i],re.U)):
+            word[i-1]=''
+            word[i]=''
+        i+=1
+    i=0
+    while i<count:
+        if (re.search('[กขฃคฆฅ]',word[i],re.U)):
+            name1[i]='1'
+        elif (re.search('[จฉชฌซฐทฒดฎตสศษ]',word[i],re.U)):
+            name1[i]='2'
+        elif (re.search('[ฟฝพผภบป]',word[i],re.U)):
+            name1[i]='3'
+        elif (re.search('[ง]',word[i],re.U)):
+            name1[i]='4'
+        elif (re.search('[ลฬรนณฦญ]',word[i],re.U)):
+            name1[i]='5'
+        elif (re.search('[ม]',word[i],re.U)):
+            name1[i]='6'
+        elif (re.search('[ย]',word[i],re.U)):
+            name1[i]='7'
+        elif (re.search('[ว]',word[i],re.U)):
+            name1[i]='8'
+        else:
+            name1[i]='0'
+        i+=1
+    return ''.join(name1)
+if __name__ == '__main__':
+    print(MetaSound('รัก'))
+    print(MetaSound('ลัก'))
\ No newline at end of file
diff --git a/pythainlp/Text.py b/pythainlp/Text.py
index 573d034d9..eb2de5a32 100644
--- a/pythainlp/Text.py
+++ b/pythainlp/Text.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import,unicode_literals
-from pythainlp.tokenize import *
+from pythainlp.tokenize import word_tokenize
 import nltk
 def Text(str1):
 	if type(str1) != 'list':
 		str1=word_tokenize(str(str1))
-	return nltk.Text(str1)
\ No newline at end of file
+	return nltk.Text(str1)
diff --git a/pythainlp/__init__.py b/pythainlp/__init__.py
index ad6103306..47bffa93e 100644
--- a/pythainlp/__init__.py
+++ b/pythainlp/__init__.py
@@ -5,17 +5,18 @@
 	"""
 	ไว้ใส่ความสามารถที่รองรับเฉพาะ Python 3.4+ เท่านั้น
 	"""
-	from pythainlp.sentiment import *
-	from pythainlp.spell import *
-from pythainlp.romanization import *
-from pythainlp.segment import *
-from pythainlp.tokenize import *
-from pythainlp.rank import *
-from pythainlp.change import *
-from pythainlp.number import *
-from pythainlp.date import *
-from pythainlp.postaggers import * 
-from pythainlp.tag import * 
-from pythainlp.collation import *
-from pythainlp.test import *
-from pythainlp.Text import *
+	from pythainlp.sentiment import sentiment
+	from pythainlp.spell import hunspell,spell
+from pythainlp.romanization import romanization,pyicu,royin
+from pythainlp.tokenize import word_tokenize,tcc,etcc
+from pythainlp.rank import rank
+from pythainlp.change import texttothai,texttoeng
+from pythainlp.number import nttn,nttt,ntnt,ntt,ttn,ttnt,number_format,numtowords,ReadNumber
+from pythainlp.date import now
+from pythainlp.tag import old,pos_tag
+from pythainlp.collation import collation
+from pythainlp.test import TestUM
+from pythainlp.Text import Text
+from pythainlp.MetaSound import MetaSound
+from pythainlp.soundex import LK82,Udom83
+from pythainlp.util import ngrams
\ No newline at end of file
diff --git a/pythainlp/corpus/LICENSE_THA_WN b/pythainlp/corpus/LICENSE_THA_WN
deleted file mode 100644
index e9774060e..000000000
--- a/pythainlp/corpus/LICENSE_THA_WN
+++ /dev/null
@@ -1,35 +0,0 @@
-Copyright: 2011 NICT
-
-Thai WordNet
-
-This software and database is being provided to you, the LICENSEE, by
-the National Institute of Information and Communications Technology
-under the following license.  By obtaining, using and/or copying this
-software and database, you agree that you have read, understood, and
-will comply with these terms and conditions:
-  
-  Permission to use, copy, modify and distribute this software and
-  database and its documentation for any purpose and without fee or
-  royalty is hereby granted, provided that you agree to comply with
-  the following copyright notice and statements, including the
-  disclaimer, and that the same appear on ALL copies of the software,
-  database and documentation, including modifications that you make
-  for internal use or for distribution.
-  
-Thai WordNet Copyright 2011 by the National Institute of
-Information and Communications Technology (NICT).  All rights
-reserved.
-  
-THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND NICT MAKES NO
-REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF EXAMPLE,
-BUT NOT LIMITATION, NICT MAKES NO REPRESENTATIONS OR WARRANTIES OF
-MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
-OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE
-ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
-  
-The name of the National Institute of Information and Communications
-Technology may not be used in advertising or publicity pertaining to
-distribution of the software and/or database.  Title to copyright in
-this software, database and any associated documentation shall at all
-times remain with National Institute of Information and Communications
-Technology and LICENSEE agrees to preserve same.
diff --git a/pythainlp/corpus/__init__.py b/pythainlp/corpus/__init__.py
index 5e8d250c3..68e349a8f 100644
--- a/pythainlp/corpus/__init__.py
+++ b/pythainlp/corpus/__init__.py
@@ -1,9 +1,3 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import,unicode_literals
 #__all__ = ["thaipos", "thaiword","alphabet","tone","country","wordnet"]
-from .thaipos import get_data
-from .thaiword import get_data
-from .alphabet import get_data
-from .tone import get_data
-from .country import get_data
-from .stopwords import words
diff --git a/pythainlp/corpus/corpus_license.md b/pythainlp/corpus/corpus_license.md
new file mode 100644
index 000000000..803c9b59f
--- /dev/null
+++ b/pythainlp/corpus/corpus_license.md
@@ -0,0 +1,218 @@
+# Corpus License
+
+tha-wn.db
+
+```
+Copyright: 2011 NICT
+
+Thai WordNet
+
+This software and database is being provided to you, the LICENSEE, by
+the National Institute of Information and Communications Technology
+under the following license.  By obtaining, using and/or copying this
+software and database, you agree that you have read, understood, and
+will comply with these terms and conditions:
+  
+  Permission to use, copy, modify and distribute this software and
+  database and its documentation for any purpose and without fee or
+  royalty is hereby granted, provided that you agree to comply with
+  the following copyright notice and statements, including the
+  disclaimer, and that the same appear on ALL copies of the software,
+  database and documentation, including modifications that you make
+  for internal use or for distribution.
+  
+Thai WordNet Copyright 2011 by the National Institute of
+Information and Communications Technology (NICT).  All rights
+reserved.
+  
+THIS SOFTWARE AND DATABASE IS PROVIDED "AS IS" AND NICT MAKES NO
+REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED.  BY WAY OF EXAMPLE,
+BUT NOT LIMITATION, NICT MAKES NO REPRESENTATIONS OR WARRANTIES OF
+MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE
+OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT INFRINGE
+ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS.
+  
+The name of the National Institute of Information and Communications
+Technology may not be used in advertising or publicity pertaining to
+distribution of the software and/or database.  Title to copyright in
+this software, database and any associated documentation shall at all
+times remain with National Institute of Information and Communications
+Technology and LICENSEE agrees to preserve same.
+```
+
+thaiword.txt , new-thaidict.txt ,  stopwords-th.txt , stopwords-th1.txt , stopwords-th2.txt , stopwords-th3.txt , stopwords-th4.txt , stopwords-th-old.txt and คำมูล-คำอ่าน.db using Creative Commons Attribution-ShareAlike 4.0 International Public License
+
+## creative commons
+
+# Attribution-ShareAlike 4.0 International
+
+Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.
+
+### Using Creative Commons Public Licenses
+
+Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
+
+* __Considerations for licensors:__ Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. [More considerations for licensors](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors).
+
+* __Considerations for the public:__ By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. [More considerations for the public](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees).
+
+## Creative Commons Attribution-ShareAlike 4.0 International Public License
+
+By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
+
+### Section 1 – Definitions.
+
+a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
+
+b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
+
+c. __BY-SA Compatible License__ means a license listed at [creativecommons.org/compatiblelicenses](http://creativecommons.org/compatiblelicenses), approved by Creative Commons as essentially the equivalent of this Public License.
+
+d. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
+
+e. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
+
+f. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
+
+g. __License Elements__ means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
+
+h. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
+
+i. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
+
+j. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License.
+
+k. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
+
+l. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
+
+m. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
+
+### Section 2 – Scope.
+
+a. ___License grant.___
+
+   1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
+
+       A. reproduce and Share the Licensed Material, in whole or in part; and
+
+       B. produce, reproduce, and Share Adapted Material.
+
+   2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
+
+   3. __Term.__ The term of this Public License is specified in Section 6(a).
+
+   4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
+
+   5. __Downstream recipients.__
+
+       A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
+
+       B. __Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
+
+       C. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
+
+   6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
+
+b. ___Other rights.___
+
+      1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
+
+      2. Patent and trademark rights are not licensed under this Public License.
+
+      3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
+
+### Section 3 – License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the following conditions.
+
+a. ___Attribution.___
+
+   1. If You Share the Licensed Material (including in modified form), You must:
+
+       A. retain the following if it is supplied by the Licensor with the Licensed Material:
+
+         i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
+
+         ii. a copyright notice;
+
+         iii. a notice that refers to this Public License;
+
+         iv. a notice that refers to the disclaimer of warranties;
+
+         v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+
+       B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
+
+       C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
+
+   2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
+
+   3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
+
+b. ___ShareAlike.___
+
+In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
+
+1. The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.
+
+2. You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
+
+3. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
+
+### Section 4 – Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
+
+a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
+
+b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
+
+c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
+
+For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
+
+### Section 5 – Disclaimer of Warranties and Limitation of Liability.
+
+a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__
+
+b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__
+
+c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
+
+### Section 6 – Term and Termination.
+
+a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
+
+b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
+
+      1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
+
+      2. upon express reinstatement by the Licensor.
+
+   For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
+
+c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
+
+d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+
+### Section 7 – Other Terms and Conditions.
+
+a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
+
+b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.t stated herein are separate from and independent of the terms and conditions of this Public License.
+
+### Section 8 – Interpretation.
+
+a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
+
+b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
+
+c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
+
+d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
+
+> Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
+>
+> Creative Commons may be contacted at creativecommons.org
+
diff --git a/pythainlp/corpus/make-stopword.tool b/pythainlp/corpus/make-stopword.tool
new file mode 100755
index 000000000..20ef0b137
--- /dev/null
+++ b/pythainlp/corpus/make-stopword.tool
@@ -0,0 +1,54 @@
+'''
+โปรแกรมรวบรวมคำศัพท์เพื่อสร้าง dict
+===================
+เขียนโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
+
+29/5/2560
+22:45 น.
+'''
+import codecs
+def data(template_file):
+	'''
+	เปิดไฟล์แล้วอ่านทีละบรรทัดส่งออกเป็น list
+	'''
+	with codecs.open(template_file, 'r',encoding='utf-8-sig') as f:
+		lines = f.read().splitlines()
+	f.close()
+	return lines
+def list1list(list1,list2):
+	'''
+	ทำการเปรียบเทียบ 2 list
+	'''
+	i=0
+	list2=list2
+	list1=list1
+	while i<len(list2):
+		if (list2[i] in list1) == False:
+			'''
+			หากไม่มีใน list1 ให้เพิ่มเข้าไปใน list1
+			'''
+			list1.append(list2[i])
+		i+=1
+	return list1
+def savetofile(file,data1,mode='w+'):
+	'''
+	บันทึกข้อมูลที่รับมาลงไฟล์
+	'''
+	thefile=codecs.open(file, mode,encoding='utf-8-sig')
+	for item in data1:
+		thefile.write("%s\n" % item)
+	thefile.close()
+	print("Ok")
+
+listno1=data("stopwords-th-old.txt") # ไฟล์ตั้งต้น
+filelist = [
+"stopwords-th1.txt",
+"stopwords-th2.txt",
+"stopwords-th3.txt",
+"stopwords-th4.txt"
+] # รายการไฟล์ทั้งหมด
+for namefile in filelist:
+	print(namefile)
+	listno2=data(namefile)
+	listno1=list1list(list1=listno1,list2=listno2)
+savetofile("stopwords-th.txt",listno1)
diff --git a/pythainlp/corpus/newthaiword.py b/pythainlp/corpus/newthaiword.py
new file mode 100644
index 000000000..595ff16ae
--- /dev/null
+++ b/pythainlp/corpus/newthaiword.py
@@ -0,0 +1,11 @@
+﻿# -*- coding: utf-8 -*-
+from __future__ import absolute_import,unicode_literals
+import os
+import codecs
+import pythainlp
+templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'corpus')
+template_file = os.path.join(templates_dir, 'new-thaidict.txt')
+def get_data():
+	with codecs.open(template_file, 'r',encoding='utf8') as f:
+		lines = f.read().splitlines()
+	return lines
diff --git a/pythainlp/corpus/stopwords-th-old.txt b/pythainlp/corpus/stopwords-th-old.txt
new file mode 100644
index 000000000..3e92e335f
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th-old.txt
@@ -0,0 +1,111 @@
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ดัง
+ซึ่ง
+ช่วง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เข้า
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+อีก
+อาจ
+ออก
+อย่าง
+อะไร
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+ลง
+ร่วม
+ราย
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
+จึง
+ไว้
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+ๆ
diff --git a/pythainlp/corpus/stopwords-th.txt b/pythainlp/corpus/stopwords-th.txt
new file mode 100644
index 000000000..8fe9bebb5
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th.txt
@@ -0,0 +1,1112 @@
+﻿นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ดัง
+ซึ่ง
+ช่วง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เข้า
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+อีก
+อาจ
+ออก
+อย่าง
+อะไร
+อยู่
+อยาก
+หาก
+หลาย
+หลังจาก
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+ลง
+ร่วม
+ราย
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
+จึง
+ไว้
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+ๆ
+ทั้ง
+วัน
+เขา
+เคย
+ไม่
+้ง
+อยาก 
+เกิน
+เกินๆ
+เกี่ยวกัน
+เกี่ยวกับ
+เกี่ยวของ
+เกี่ยวเนื่อง
+เกี่ยวๆ
+เกือบ
+เกือบจะ
+เกือบๆ
+แก
+แก
+แกไข
+ใกล
+ใกลๆ
+ไกล
+ไกลๆ
+ขณะเดียวกัน
+ขณะใด
+ขณะใดๆ
+ขณะที่
+ขณะนั้น
+ขณะนี้
+ขณะหนึ่ง
+ขวาง
+ขวางๆ
+ขั้น
+ใคร
+ใคร
+ใครจะ
+ใครๆ
+งาย
+งายๆ
+ไง
+จง
+จด
+จน
+จนกระทั่ง
+จนกวา
+จนขณะนี้
+จนตลอด
+จนถึง
+จนทั่ว
+จนบัดนี้
+จนเมื่อ
+จนแม
+จนแมน
+จรด
+จรดกับ
+จริง
+จริงจัง
+จริงๆ
+จริงๆจังๆ
+จวน
+จวนจะ
+จวนเจียน
+จวบ
+ซึ่งก็
+ซึ่งก็คือ
+ซึ่งกัน
+ซึ่งกันและกัน
+ซึ่งไดแก
+ซึ่งๆ
+ณ
+ดวย
+ดวยกัน
+ดวยเชนกัน
+ดวยที่
+ดวยประการฉะนี้
+ดวยเพราะ
+ดวยวา
+ดวยเหตุที่
+ดวยเหตุนั้น
+ดวยเหตุนี้
+ดวยเหตุเพราะ
+ดวยเหตุวา
+ดวยเหมือนกัน
+ดั่ง
+ดังกลาว
+ดังกับ
+ดั่งกับ
+ดังกับวา
+ดั่งกับวา
+ดังเกา
+ดั่งเกา
+ดังเคย
+ใดๆ
+ได
+ไดแก
+ไดแต
+ไดที่
+ไดมา
+ไดรับ
+ตน
+ตนเอง
+ตนฯ
+ตรง
+ตรงๆ
+ตลอด
+ตลอดกาล
+ตลอดกาลนาน
+ตลอดจน
+ตลอดถึง
+ตลอดทั้ง
+ตลอดทั่ว
+ตลอดทั่วถึง
+ตลอดทั่วทั้ง
+ตลอดป
+ตลอดไป
+ตลอดมา
+ตลอดระยะเวลา
+ตลอดวัน
+ตลอดเวลา
+ตลอดศก
+ตอ
+ตอกัน
+ถึงแก
+ถึงจะ
+ถึงบัดนั้น
+ถึงบัดนี้
+ถึงเมื่อ
+ถึงเมื่อใด
+ถึงเมื่อไร
+ถึงแม
+ถึงแมจะ
+ถึงแมวา
+ถึงอยางไร
+ถือ
+ถือวา
+ถูกตอง
+ถูกๆ
+เถอะ
+เถิด
+ทรง
+ทวา
+ทั้งคน
+ทั้งตัว
+ทั้งที
+ทั้งที่
+ทั้งนั้น
+ทั้งนั้นดวย
+ทั้งนั้นเพราะ
+นอก
+นอกจากที่
+นอกจากนั้น
+นอกจากนี้
+นอกจากวา
+นอกนั้น
+นอกเหนือ
+นอกเหนือจาก
+นอย
+นอยกวา
+นอยๆ
+นะ
+นะ
+นักๆ
+นั่น
+นั่นไง
+นั่นเปน
+นั่นแหละ
+นั่นเอง
+นั้นๆ
+นับ
+นับจากนั้น
+นับจากนี้
+นับตั้งแต
+นับแต
+นับแตที่
+นับแตนั้น
+เปนตน
+เปนตนไป
+เปนตนมา
+เปนแต
+เปนแตเพียง
+เปนที
+เปนที่
+เปนที่สุด
+เปนเพราะ
+เปนเพราะวา
+เปนเพียง
+เปนเพียงวา
+เปนเพื่อ
+เปนอัน
+เปนอันมาก
+เปนอันวา
+เปนอันๆ
+เปนอาทิ
+เปนๆ
+เปลี่ยน
+เปลี่ยนแปลง
+เปด
+เปดเผย
+ไป
+ผาน
+ผานๆ
+ผิด
+ผิดๆ
+ผู
+เพียงเพื่อ
+เพียงไร
+เพียงไหน
+เพื่อที่
+เพื่อที่จะ
+เพื่อวา
+เพื่อให
+ภาค
+ภาคฯ
+ภาย
+ภายใต
+ภายนอก
+ภายใน
+ภายภาค
+ภายภาคหนา
+ภายหนา
+ภายหลัง
+มอง
+มองวา
+มัก
+มักจะ
+มัน
+มันๆ
+มั้ย
+มั้ยนะ
+มั้ยนั่น
+มั้ยเนี่ย
+มั้ยละ
+ยืนนาน
+ยืนยง
+ยืนยัน
+ยืนยาว
+เยอะ
+เยอะแยะ
+เยอะๆ
+แยะ
+แยะๆ
+รวด
+รวดเร็ว
+รวม
+รวมกัน
+รวมกัน
+รวมดวย
+รวมดวย
+รวมถึง
+รวมทั้ง
+รวมมือ
+รวมๆ
+ระยะ
+ระยะๆ
+ระหวาง
+รับรอง
+รึ
+รึวา
+รือ
+รือวา
+สิ้นกาลนาน
+สืบเนื่อง
+สุดๆ
+สู
+สูง
+สูงกวา
+สูงสง
+สูงสุด
+สูงๆ
+เสมือนกับ
+เสมือนวา
+เสร็จ
+เสร็จกัน
+เสร็จแลว
+เสร็จสมบูรณ
+เสร็จสิ้น
+เสีย
+เสียกอน
+เสียจน
+เสียจนกระทั่ง
+เสียจนถึง
+เสียดวย
+เสียนั่น
+เสียนั่นเอง
+เสียนี่
+เสียนี่กระไร
+เสียยิ่ง
+เสียยิ่งนัก
+เสียแลว
+ใหญๆ
+ใหดี
+ใหแด
+ใหไป
+ใหม
+ใหมา
+ใหมๆ
+ไหน
+ไหนๆ
+อดีต
+อนึ่ง
+อยาง
+อยางเชน
+อยางดี
+อยางเดียว
+อยางใด
+อยางที่
+อยางนอย
+อยางนั้น
+อยางนี้
+อยางโนน
+ก็คือ
+ก็แค่
+ก็จะ
+ก็ดี
+ก็ได้
+ก็ต่อเมื่อ
+ก็ตาม
+ก็ตามแต่
+ก็ตามที
+ก็แล้วแต่
+กระทั่ง
+กระทำ
+กระนั้น
+กระผม
+กลับ
+กล่าวคือ
+กลุ่ม
+กลุ่มก้อน
+กลุ่มๆ
+กว้าง
+กว้างขวาง
+กว้างๆ
+ก่อนหน้า
+ก่อนหน้านี้
+ก่อนๆ
+กันดีกว่า
+กันดีไหม
+กันเถอะ
+กันนะ
+กันและกัน
+กันไหม
+กันเอง
+กำลัง
+กำลังจะ
+กำหนด
+กู
+เก็บ
+เกิด
+เกี่ยวข้อง
+แก่
+แก้ไข
+ใกล้
+ใกล้ๆ
+ข้า
+ข้าง
+ข้างเคียง
+ข้างต้น
+ข้างบน
+ข้างล่าง
+ข้างๆ
+ขาด
+ข้าพเจ้า
+ข้าฯ
+เข้าใจ
+เขียน
+คงจะ
+คงอยู่
+ครบ
+ครบครัน
+ครบถ้วน
+ครั้งกระนั้น
+ครั้งก่อน
+ครั้งครา
+ครั้งคราว
+ครั้งใด
+ครั้งที่
+ครั้งนั้น
+ครั้งนี้
+ครั้งละ
+ครั้งหนึ่ง
+ครั้งหลัง
+ครั้งหลังสุด
+ครั้งไหน
+ครั้งๆ
+ครัน
+ครับ
+ครา
+คราใด
+คราที่
+ครานั้น
+ครานี้
+คราหนึ่ง
+คราไหน
+คราว
+คราวก่อน
+คราวใด
+คราวที่
+คราวนั้น
+คราวนี้
+คราวโน้น
+คราวละ
+คราวหน้า
+คราวหนึ่ง
+คราวหลัง
+คราวไหน
+คราวๆ
+คล้าย
+คล้ายกัน
+คล้ายกันกับ
+คล้ายกับ
+คล้ายกับว่า
+คล้ายว่า
+ควร
+ค่อน
+ค่อนข้าง
+ค่อนข้างจะ
+ค่อยไปทาง
+ค่อนมาทาง
+ค่อย
+ค่อยๆ
+คะ
+ค่ะ
+คำ
+คิด
+คิดว่า
+คุณ
+คุณๆ
+เคยๆ
+แค่
+แค่จะ
+แค่นั้น
+แค่นี้
+แค่เพียง
+แค่ว่า
+แค่ไหน
+ใคร่
+ใคร่จะ
+ง่าย
+ง่ายๆ
+จนกว่า
+จนแม้
+จนแม้น
+จังๆ
+จวบกับ
+จวบจน
+จ้ะ
+จ๊ะ
+จะได้
+จัง
+จัดการ
+จัดงาน
+จัดแจง
+จัดตั้ง
+จัดทำ
+จัดหา
+จัดให้
+จับ
+จ้า
+จ๋า
+จากนั้น
+จากนี้ 
+จากนี้ไป
+จำ
+จำเป็น 
+จำพวก
+จึงจะ
+จึงเป็น
+จู่ๆ
+ฉะนั้น
+ฉะนี้
+ฉัน
+เฉกเช่น
+เฉย
+เฉยๆ
+ไฉน
+ช่วงก่อน
+ช่วงต่อไป
+ช่วงถัดไป
+ช่วงท้าย
+ช่วงที่
+ช่วงนั้น
+ช่วงนี้
+ช่วงระหว่าง
+ช่วงแรก
+ช่วงหน้า
+ช่วงหลัง
+ช่วงๆ
+ช่วย
+ช้า
+ช้านาน
+ชาว
+ช้าๆ
+เช่นก่อน
+เช่นกัน
+เช่นเคย
+เช่นดัง
+เช่นดังก่อน
+เช่นดังเก่า
+เช่นดังที่
+เช่นดังว่า
+เช่นเดียวกัน
+เช่นเดียวกับ
+เช่นใด
+เช่นที่
+เช่นที่เคย
+เช่นที่ว่า
+เช่นนั้น
+เช่นนั้นเอง
+เช่นนี้
+เช่นเมื่อ
+เช่นไร
+เชื่อ
+เชื่อถือ
+เชื่อมั่น
+เชื่อว่า
+ใช่
+ใช่ไหม
+ใช้
+ซะ
+ซะก่อน
+ซะจน
+ซะจนกระทั่ง
+ซะจนถึง
+ซึ่งได้แก่
+ด้วยกัน
+ด้วยเช่นกัน
+ด้วยที่
+ด้วยเพราะ
+ด้วยว่า
+ด้วยเหตุที่
+ด้วยเหตุนั้น
+ด้วยเหตุนี้
+ด้วยเหตุเพราะ
+ด้วยเหตุว่า
+ด้วยเหมือนกัน
+ดังกล่าว
+ดังกับว่า
+ดั่งกับว่า
+ดังเก่า
+ดั่งเก่า
+ดั่งเคย
+ต่างก็
+ต่างหาก
+ตามด้วย
+ตามแต่
+ตามที่
+ตามๆ
+เต็มไปด้วย
+เต็มไปหมด
+เต็มๆ
+แต่ก็
+แต่ก่อน
+แต่จะ
+แต่เดิม
+แต่ต้อง
+แต่ถ้า
+แต่ทว่า
+แต่ที่
+แต่นั้น
+แต่เพียง
+แต่เมื่อ
+แต่ไร
+แต่ละ
+แต่ว่า
+แต่ไหน
+แต่อย่างใด
+โต
+โตๆ
+ใต้
+ถ้าจะ
+ถ้าหาก
+ถึงแก่
+ถึงแม้
+ถึงแม้จะ
+ถึงแม้ว่า
+ถึงอย่างไร
+ถือว่า
+ถูกต้อง
+ทว่า
+ทั้งนั้นด้วย
+ทั้งปวง
+ทั้งเป็น
+ทั้งมวล
+ทั้งสิ้น
+ทั้งหมด
+ทั้งหลาย
+ทั้งๆ
+ทัน
+ทันใดนั้น
+ทันที
+ทันทีทันใด
+ทั่ว
+ท
+าไม
+าไร
+าให้
+าๆ
+ที
+ที่จริง
+ที่ซึ่ง
+ทีเดียว
+ทีใด
+ที่ใด
+ที่ได้
+ทีเถอะ
+ที่แท้
+ที่แท้จริง
+ที่นั้น
+ที่นี้
+ทีไร
+ทีละ
+ที่ละ
+ที่แล้ว
+ที่ว่า
+ที่แห่งนั้น
+ที่ไหน
+ทีๆ
+ที่ๆ
+ทุกคน
+ทุกครั้ง
+ทุกครา
+ทุกคราว
+ทุกชิ้น
+ทุกตัว
+ทุกทาง
+ทุกที
+ทุกที่
+ทุกเมื่อ
+ทุกวัน
+ทุกวันนี้
+ทุกสิ่ง
+ทุกหน
+ทุกแห่ง
+ทุกอย่าง
+ทุกอัน
+ทุกๆ
+เท่า
+เท่ากัน
+เท่ากับ
+เท่าใด
+เท่าที่
+เท่านั้น
+เท่านี้
+เท่าไร
+เท่าไหร่
+แท้
+แท้จริง
+เธอ
+นอกจากว่า
+น้อย
+น้อยกว่า
+น้อยๆ
+น่ะ
+นั้นไว
+นับแต่นี้
+นาง
+นางสาว
+น่าจะ
+นาน
+นานๆ
+นาย
+นำ
+นำพา
+นำมา
+นิด
+นิดหน่อย
+นิดๆ
+นี่
+นี่ไง
+นี่นา
+นี่แน่ะ
+นี่แหละ
+นี้แหล่
+นี่เอง
+นี้เอง
+นู่น
+นู้น
+เน้น
+เนี่ย
+เนี่ยเอง
+ในช่วง
+ในที่
+ในเมื่อ
+ในระหว่าง
+บน
+บอก
+บอกแล้ว
+บอกว่า
+บ่อย
+บ่อยกว่า
+บ่อยครั้ง
+บ่อยๆ
+บัดดล
+บัดเดี๋ยวนี้
+บัดนั้น
+บัดนี้
+บ้าง
+บางกว่า
+บางขณะ
+บางครั้ง
+บางครา
+บางคราว
+บางที
+บางที่
+บางแห่ง
+บางๆ
+ปฏิบัติ
+ประกอบ
+ประการ
+ประการฉะนี้
+ประการใด
+ประการหนึ่ง
+ประมาณ
+ประสบ
+ปรับ
+ปรากฏ
+ปรากฏว่า
+ปัจจุบัน
+ปิด
+เป็นด้วย
+เป็นดัง
+เป็นต้น
+เป็นแต่
+เป็นเพื่อ
+เป็นอัน
+เป็นอันมาก
+เป็นอาทิ
+ผ่านๆ
+ผู้
+ผู้ใด
+เผื่อ
+เผื่อจะ
+เผื่อที่
+เผื่อว่า
+ฝ่าย
+ฝ่ายใด
+พบว่า
+พยายาม
+พร้อมกัน
+พร้อมกับ
+พร้อมด้วย
+พร้อมทั้ง
+พร้อมที่
+พร้อมเพียง
+พวก
+พวกกัน
+พวกกู
+พวกแก
+พวกเขา
+พวกคุณ
+พวกฉัน
+พวกท่าน
+พวกที่
+พวกเธอ
+พวกนั้น
+พวกนี้
+พวกนู้น
+พวกโน้น
+พวกมัน
+พวกมึง
+พอ
+พอกัน
+พอควร
+พอจะ
+พอดี
+พอตัว
+พอที
+พอที่
+พอเพียง
+พอแล้ว
+พอสม
+พอสมควร
+พอเหมาะ
+พอๆ
+พา
+พึง
+พึ่ง
+พื้นๆ
+พูด
+เพราะฉะนั้น
+เพราะว่า
+เพิ่ง
+เพิ่งจะ
+เพิ่ม
+เพิ่มเติม
+เพียง
+เพียงแค่
+เพียงใด
+เพียงแต่
+เพียงพอ
+เพียงเพราะ
+เพื่อว่า
+เพื่อให้
+ภายใต้
+มองว่า
+มั๊ย
+มากกว่า
+มากมาย
+มิ
+มิฉะนั้น
+มิใช่
+มิได้
+มีแต่
+มึง
+มุ่ง
+มุ่งเน้น
+มุ่งหมาย
+เมื่อก่อน
+เมื่อครั้ง
+เมื่อครั้งก่อน
+เมื่อคราวก่อน
+เมื่อคราวที่
+เมื่อคราว
+เมื่อคืน
+เมื่อเช้า
+เมื่อใด
+เมื่อนั้น
+เมื่อนี้
+เมื่อเย็น
+เมื่อไร
+เมื่อวันวาน
+เมื่อวาน
+เมื่อไหร่
+แม้
+แม้กระทั่ง
+แม้แต่
+แม้นว่า
+แม้ว่า
+ไม่ค่อย
+ไม่ค่อยจะ
+ไม่ค่อยเป็น
+ไม่ใช่
+ไม่เป็นไร
+ไม่ว่า
+ยก
+ยกให้
+ยอม
+ยอมรับ
+ย่อม
+ย่อย
+ยังคง
+ยังงั้น
+ยังงี้
+ยังโง้น
+ยังไง
+ยังจะ
+ยังแต่
+ยาก
+ยาว
+ยาวนาน
+ยิ่ง
+ยิ่งกว่า
+ยิ่งขึ้น
+ยิ่งขึ้นไป
+ยิ่งจน
+ยิ่งจะ
+ยิ่งนัก
+ยิ่งเมื่อ
+ยิ่งแล้ว
+ยิ่งใหญ่
+ร่วมกัน
+รวมด้วย
+ร่วมด้วย
+รือว่า
+เร็ว
+เร็วๆ
+เราๆ
+เรียก
+เรียบ
+เรื่อย
+เรื่อยๆ
+ไร
+ล้วน
+ล้วนจน
+ล้วนแต่
+ละ
+ล่าสุด
+เล็ก
+เล็กน้อย
+เล็กๆ
+เล่าว่า
+แล้วกัน
+แล้วแต่
+แล้วเสร็จ
+วันใด
+วันนั้น
+วันนี้
+วันไหน
+สบาย
+สมัย
+สมัยก่อน
+สมัยนั้น
+สมัยนี้
+สมัยโน้น
+ส่วนเกิน
+ส่วนด้อย
+ส่วนดี
+ส่วนใด
+ส่วนที่
+ส่วนน้อย
+ส่วนนั้น
+ส่วนมาก
+ส่วนใหญ่
+สั้น
+สั้นๆ
+สามารถ
+สำคัญ
+สิ่ง
+สิ่งใด
+สิ่งนั้น
+สิ่งนี้
+สิ่งไหน
+สิ้น
+เสร็จแล้ว
+เสียด้วย
+เสียแล้ว
+แสดง
+แสดงว่า
+หน
+หนอ
+หนอย
+หน่อย
+หมด
+หมดกัน
+หมดสิ้น
+หรือไง
+หรือเปล่า
+หรือไม่
+หรือยัง
+หรือไร
+หากแม้
+หากแม้น
+หากแม้นว่า
+หากว่า
+หาความ
+หาใช่
+หารือ
+เหตุ
+เหตุผล
+เหตุนั้น
+เหตุนี้
+เหตุไร
+เห็นแก่
+เห็นควร
+เห็นจะ
+เห็นว่า
+เหลือ
+เหลือเกิน
+เหล่า
+เหล่านั้น
+เหล่านี้
+แห่งใด
+แห่งนั้น
+แห่งนี้
+แห่งโน้น
+แห่งไหน
+แหละ
+ให้แก่
+ใหญ่
+ใหญ่โต
+อย่างเช่น
+อย่างดี
+อย่างเดียว
+อย่างใด
+อย่างที่
+อย่างน้อย
+อย่างนั้น
+อย่างนี้
+อย่างโน้น
+อย่างมาก
+อย่างยิ่ง
+อย่างไร
+อย่างไรก็
+อย่างไรก็ได้
+อย่างไรเสีย
+อย่างละ
+อย่างหนึ่ง
+อย่างไหน
+อย่างๆ
+อัน
+อันจะ
+อันใด
+อันได้แก่
+อันที่
+อันที่จริง
+อันที่จะ
+อันเนื่องมาจาก
+อันละ
+อันไหน
+อันๆ
+อาจจะ
+อาจเป็น
+อาจเป็นด้วย
+อื่น
+อื่นๆ
+เอ็ง
+เอา
+ฯ
+ฯล
+ฯลฯ
diff --git a/pythainlp/corpus/stopwords-th1.txt b/pythainlp/corpus/stopwords-th1.txt
new file mode 100644
index 000000000..5a8dc03f7
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th1.txt
@@ -0,0 +1,116 @@
+กล่าว
+กว่า
+กัน
+กับ
+การ
+ก็
+ก่อน
+ขณะ
+ขอ
+ของ
+ขึ้น
+คง
+ครั้ง
+ความ
+คือ
+จะ
+จัด
+จาก
+จึง
+ช่วง
+ซึ่ง
+ดัง
+ด้วย
+ด้าน
+ตั้ง
+ตั้งแต่
+ตาม
+ต่อ
+ต่าง
+ต่างๆ
+ต้อง
+ถึง
+ถูก
+ถ้า
+ทั้ง
+ทั้งนี้
+ทาง
+ที่
+ที่สุด
+ทุก
+ทํา
+ทําให้
+นอกจาก
+นัก
+นั้น
+นี้
+น่า
+นํา
+บาง
+ผล
+ผ่าน
+พบ
+พร้อม
+มา
+มาก
+มี
+ยัง
+รวม
+ระหว่าง
+รับ
+ราย
+ร่วม
+ลง
+วัน
+ว่า
+สุด
+ส่ง
+ส่วน
+สําหรับ
+หนึ่ง
+หรือ
+หลัง
+หลังจาก
+หลาย
+หาก
+อยาก
+อยู่
+อย่าง
+ออก
+อะไร
+อาจ
+อีก
+เขา
+เข้า
+เคย
+เฉพาะ
+เช่น
+เดียว
+เดียวกัน
+เนื่องจาก
+เปิด
+เปิดเผย
+เป็น
+เป็นการ
+เพราะ
+เพื่อ
+เมื่อ
+เรา
+เริ่ม
+เลย
+เห็น
+เอง
+แต่
+แบบ
+แรก
+และ
+แล้ว
+แห่ง
+โดย
+ใน
+ให้
+ได้
+ไป
+ไม่
+ไว้
+้ง
\ No newline at end of file
diff --git a/pythainlp/corpus/stopwords-th2.txt b/pythainlp/corpus/stopwords-th2.txt
new file mode 100644
index 000000000..434ddacac
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th2.txt
@@ -0,0 +1,114 @@
+ไว้
+ไม่
+ไป
+ได้
+ให้
+ใน
+โดย
+แห่ง
+แล้ว
+และ
+แรก
+แบบ
+แต่
+เอง
+เห็น
+เลย
+เริ่ม
+เรา
+เมื่อ
+เพื่อ
+เพราะ
+เป็นการ
+เป็น
+เปิดเผย
+เปิด
+เนื่องจาก
+เดียวกัน
+เดียว
+เช่น
+เฉพาะ
+เคย
+เข้า
+เขา
+อีก
+อาจ
+อะไร
+ออก
+อย่าง
+อยู่
+อยาก 
+หาก
+หลาย
+หลังจาก
+หลัง
+หรือ
+หนึ่ง
+ส่วน
+ส่ง
+สุด
+สําหรับ
+ว่า
+วัน
+ลง
+ร่วม
+ราย
+รับ
+ระหว่าง
+รวม
+ยัง
+มี
+มาก
+มา
+พร้อม
+พบ
+ผ่าน
+ผล
+บาง
+น่า
+นี้
+นํา
+นั้น
+นัก
+นอกจาก
+ทุก
+ที่สุด
+ที่
+ทําให้
+ทํา
+ทาง
+ทั้งนี้
+ถ้า
+ถูก
+ถึง
+ต้อง
+ต่างๆ
+ต่าง
+ต่อ
+ตาม
+ตั้งแต่
+ตั้ง
+ด้าน
+ด้วย
+ดัง
+ซึ่ง
+ช่วง
+จึง
+จาก
+จัด
+จะ
+คือ
+ความ
+ครั้ง
+คง
+ขึ้น
+ของ
+ขอ
+ขณะ
+ก่อน
+ก็
+การ
+กับ
+กัน
+กว่า
+กล่าว
diff --git a/pythainlp/corpus/stopwords-th3.txt b/pythainlp/corpus/stopwords-th3.txt
new file mode 100644
index 000000000..e4cb167dc
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th3.txt
@@ -0,0 +1,322 @@
+เกิน
+เกินๆ
+เกี่ยวกัน
+เกี่ยวกับ
+เกี่ยวของ
+เกี่ยวเนื่อง
+เกี่ยวๆ
+เกือบ
+เกือบจะ
+เกือบๆ
+แก
+แก
+แกไข
+ใกล
+ใกลๆ
+ไกล
+ไกลๆ
+ขณะ
+ขณะเดียวกัน
+ขณะใด
+ขณะใดๆ
+ขณะที่
+ขณะนั้น
+ขณะนี้
+ขณะหนึ่ง
+ขวาง
+ขวางๆ
+ขอ
+ของ
+ขั้น
+ใคร
+ใคร
+ใครจะ
+ใครๆ
+งาย
+งายๆ
+ไง
+จง
+จด
+จน
+จนกระทั่ง
+จนกวา
+จนขณะนี้
+จนตลอด
+จนถึง
+จนทั่ว
+จนบัดนี้
+จนเมื่อ
+จนแม
+จนแมน
+จรด
+จรดกับ
+จริง
+จริงจัง
+จริงๆ
+จริงๆจังๆ
+จวน
+จวนจะ
+จวนเจียน
+จวบ
+ซึ่งก็
+ซึ่งก็คือ
+ซึ่งกัน
+ซึ่งกันและกัน
+ซึ่งไดแก
+ซึ่งๆ
+ณ
+ดวย
+ดวยกัน
+ดวยเชนกัน
+ดวยที่
+ดวยประการฉะนี้
+ดวยเพราะ
+ดวยวา
+ดวยเหตุที่
+ดวยเหตุนั้น
+ดวยเหตุนี้
+ดวยเหตุเพราะ
+ดวยเหตุวา
+ดวยเหมือนกัน
+ดัง
+ดั่ง
+ดังกลาว
+ดังกับ
+ดั่งกับ
+ดังกับวา
+ดั่งกับวา
+ดังเกา
+ดั่งเกา
+ดังเคย
+ใดๆ
+ได
+ไดแก
+ไดแต
+ไดที่
+ไดมา
+ไดรับ
+ตน
+ตนเอง
+ตนฯ
+ตรง
+ตรงๆ
+ตลอด
+ตลอดกาล
+ตลอดกาลนาน
+ตลอดจน
+ตลอดถึง
+ตลอดทั้ง
+ตลอดทั่ว
+ตลอดทั่วถึง
+ตลอดทั่วทั้ง
+ตลอดป
+ตลอดไป
+ตลอดมา
+ตลอดระยะเวลา
+ตลอดวัน
+ตลอดเวลา
+ตลอดศก
+ตอ
+ตอกัน
+ถึง
+ถึงแก
+ถึงจะ
+ถึงบัดนั้น
+ถึงบัดนี้
+ถึงเมื่อ
+ถึงเมื่อใด
+ถึงเมื่อไร
+ถึงแม
+ถึงแมจะ
+ถึงแมวา
+ถึงอยางไร
+ถือ
+ถือวา
+ถูก
+ถูกตอง
+ถูกๆ
+เถอะ
+เถิด
+ทรง
+ทวา
+ทั้ง
+ทั้งคน
+ทั้งตัว
+ทั้งที
+ทั้งที่
+ทั้งนั้น
+ทั้งนั้นดวย
+ทั้งนั้นเพราะ
+ทั้งนี้
+นอก
+นอกจาก
+นอกจากที่
+นอกจากนั้น
+นอกจากนี้
+นอกจากวา
+นอกนั้น
+นอกเหนือ
+นอกเหนือจาก
+นอย
+นอยกวา
+นอยๆ
+นะ
+นะ
+นัก
+นักๆ
+นั่น
+นั้น
+นั่นไง
+นั่นเปน
+นั่นแหละ
+นั่นเอง
+นั้นๆ
+นับ
+นับจากนั้น
+นับจากนี้
+นับตั้งแต
+นับแต
+นับแตที่
+นับแตนั้น
+เปนตน
+เปนตนไป
+เปนตนมา
+เปนแต
+เปนแตเพียง
+เปนที
+เปนที่
+เปนที่สุด
+เปนเพราะ
+เปนเพราะวา
+เปนเพียง
+เปนเพียงวา
+เปนเพื่อ
+เปนอัน
+เปนอันมาก
+เปนอันวา
+เปนอันๆ
+เปนอาทิ
+เปนๆ
+เปลี่ยน
+เปลี่ยนแปลง
+เปด
+เปดเผย
+ไป
+ไป
+ผาน
+ผานๆ
+ผิด
+ผิดๆ
+ผู
+เพียงเพื่อ
+เพียงไร
+เพียงไหน
+เพื่อ
+เพื่อที่
+เพื่อที่จะ
+เพื่อวา
+เพื่อให
+ภาค
+ภาคฯ
+ภาย
+ภายใต
+ภายนอก
+ภายใน
+ภายภาค
+ภายภาคหนา
+ภายหนา
+ภายหลัง
+มอง
+มองวา
+มัก
+มักจะ
+มัน
+มันๆ
+มั้ย
+มั้ยนะ
+มั้ยนั่น
+มั้ยเนี่ย
+มั้ยละ
+มา
+ยืนนาน
+ยืนยง
+ยืนยัน
+ยืนยาว
+เยอะ
+เยอะแยะ
+เยอะๆ
+แยะ
+แยะๆ
+รวด
+รวดเร็ว
+รวม
+รวม
+รวมกัน
+รวมกัน
+รวมดวย
+รวมดวย
+รวมถึง
+รวมทั้ง
+รวมมือ
+รวมๆ
+ระยะ
+ระยะๆ
+ระหวาง
+รับ
+รับรอง
+รึ
+รึวา
+รือ
+รือวา
+สิ้นกาลนาน
+สืบเนื่อง
+สุด
+สุดๆ
+สู
+สูง
+สูงกวา
+สูงสง
+สูงสุด
+สูงๆ
+เสมือนกับ
+เสมือนวา
+เสร็จ
+เสร็จกัน
+เสร็จแลว
+เสร็จสมบูรณ
+เสร็จสิ้น
+เสีย
+เสียกอน
+เสียจน
+เสียจนกระทั่ง
+เสียจนถึง
+เสียดวย
+เสียนั่น
+เสียนั่นเอง
+เสียนี่
+เสียนี่กระไร
+เสียยิ่ง
+เสียยิ่งนัก
+เสียแลว
+ใหญๆ
+ใหดี
+ใหแด
+ใหไป
+ใหม
+ใหมา
+ใหมๆ
+ไหน
+ไหนๆ
+อดีต
+อนึ่ง
+อยาก
+อยาง
+อยางเชน
+อยางดี
+อยางเดียว
+อยางใด
+อยางที่
+อยางนอย
+อยางนั้น
+อยางนี้
+อยางโนน
diff --git a/pythainlp/corpus/stopwords-th4.txt b/pythainlp/corpus/stopwords-th4.txt
new file mode 100644
index 000000000..2424dac00
--- /dev/null
+++ b/pythainlp/corpus/stopwords-th4.txt
@@ -0,0 +1,887 @@
+ก็
+ก็คือ
+ก็แค่
+ก็จะ
+ก็ดี
+ก็ได้
+ก็ต่อเมื่อ
+ก็ตาม
+ก็ตามแต่
+ก็ตามที
+ก็แล้วแต่
+กระทั่ง
+กระทำ
+กระนั้น
+กระผม
+กลับ
+กล่าว
+กล่าวคือ
+กลุ่ม
+กลุ่มก้อน
+กลุ่มๆ
+กว่า
+กว้าง
+กว้างขวาง
+กว้างๆ
+ก่อน
+ก่อนหน้า
+ก่อนหน้านี้
+ก่อนๆ
+กัน
+กันดีกว่า
+กันดีไหม
+กันเถอะ
+กันนะ
+กันและกัน
+กันไหม
+กันเอง
+กับ
+การ
+กำลัง
+กำลังจะ
+กำหนด
+กู
+เก็บ
+เกิด
+เกิน
+เกินๆ
+เกี่ยวกัน
+เกี่ยวกับ
+เกี่ยวข้อง
+เกี่ยวเนื่อง
+เกี่ยวๆ
+เกือบ
+เกือบจะ
+เกือบๆ
+แก
+แก่
+แก้ไข
+ใกล้
+ใกล้ๆ
+ไกล
+ไกลๆ
+ขณะ
+ขณะเดียวกัน
+ขณะใด
+ขณะใดๆ
+ขณะที่
+ขณะนั้น
+ขณะนี้
+ขณะหนึ่ง
+ขวาง
+ขวางๆ
+ขอ
+ของ
+ขั้น
+ข้า
+ข้าง
+ข้างเคียง
+ข้างต้น
+ข้างบน
+ข้างล่าง
+ข้างๆ
+ขาด
+ข้าพเจ้า
+ข้าฯ
+ขึ้น
+เขา
+เข้า
+เข้าใจ
+เขียน
+คง
+คงจะ
+คงอยู่
+ครบ
+ครบครัน
+ครบถ้วน
+ครั้ง
+ครั้งกระนั้น
+ครั้งก่อน
+ครั้งครา
+ครั้งคราว
+ครั้งใด
+ครั้งที่
+ครั้งนั้น
+ครั้งนี้
+ครั้งละ
+ครั้งหนึ่ง
+ครั้งหลัง
+ครั้งหลังสุด
+ครั้งไหน
+ครั้งๆ
+ครัน
+ครับ
+ครา
+คราใด
+คราที่
+ครานั้น
+ครานี้
+คราหนึ่ง
+คราไหน
+คราว
+คราวก่อน
+คราวใด
+คราวที่
+คราวนั้น
+คราวนี้
+คราวโน้น
+คราวละ
+คราวหน้า
+คราวหนึ่ง
+คราวหลัง
+คราวไหน
+คราวๆ
+คล้าย
+คล้ายกัน
+คล้ายกันกับ
+คล้ายกับ
+คล้ายกับว่า
+คล้ายว่า
+ควร
+ความ
+ค่อน
+ค่อนข้าง
+ค่อนข้างจะ
+ค่อยไปทาง
+ค่อนมาทาง
+ค่อย
+ค่อยๆ
+คะ
+ค่ะ
+คำ
+คิด
+คิดว่า
+คือ
+คุณ
+คุณๆ
+เคย
+เคยๆ
+แค่
+แค่จะ
+แค่นั้น
+แค่นี้
+แค่เพียง
+แค่ว่า
+แค่ไหน
+ใคร
+ใคร่
+ใคร่จะ
+ง่าย
+ง่ายๆ
+ไง
+จง
+จด
+จนกระทั่ง
+จนกว่า
+จนขณะนี้
+จนตลอด
+จนถึง
+จนทั่ว
+จนบัดนี้
+จนเมื่อ
+จนแม้
+จนแม้น
+จรด
+จรดกับ
+จริง
+จริงจัง
+จริงๆ
+จริงๆ
+จังๆ
+จวน
+จวนจะ
+จวนเจียน
+จวบ
+จวบกับ
+จวบจน
+จะ
+จ้ะ
+จ๊ะ
+จะได้
+จัง
+จังๆ
+จัด
+จัดการ
+จัดงาน
+จัดแจง
+จัดตั้ง
+จัดทำ
+จัดหา
+จัดให้
+จับ
+จ้า
+จ๋า
+จาก
+จากนั้น
+จากนี้ 
+จากนี้ไป
+จำ
+จำเป็น 
+จำพวก
+จึง
+จึงจะ
+จึงเป็น
+จู่ๆ
+ฉะนั้น
+ฉะนี้
+ฉัน
+เฉกเช่น
+เฉพาะ
+เฉย
+เฉยๆ
+ไฉน
+ช่วง
+ช่วงก่อน
+ช่วงต่อไป
+ช่วงถัดไป
+ช่วงท้าย
+ช่วงที่
+ช่วงนั้น
+ช่วงนี้
+ช่วงระหว่าง
+ช่วงแรก
+ช่วงหน้า
+ช่วงหลัง
+ช่วงๆ
+ช่วย
+ช้า
+ช้านาน
+ชาว
+ช้าๆ
+เช่น
+เช่นก่อน
+เช่นกัน
+เช่นเคย
+เช่นดัง
+เช่นดังก่อน
+เช่นดังเก่า
+เช่นดังที่
+เช่นดังว่า
+เช่นเดียวกัน
+เช่นเดียวกับ
+เช่นใด
+เช่นที่
+เช่นที่เคย
+เช่นที่ว่า
+เช่นนั้น
+เช่นนั้นเอง
+เช่นนี้
+เช่นเมื่อ
+เช่นไร
+เชื่อ
+เชื่อถือ
+เชื่อมั่น
+เชื่อว่า
+ใช่
+ใช่ไหม
+ใช้
+ซะ
+ซะก่อน
+ซะจน
+ซะจนกระทั่ง
+ซะจนถึง
+ซึ่ง
+ซึ่งก็
+ซึ่งก็คือ
+ซึ่งกัน
+ซึ่งกันและกัน
+ซึ่งได้แก่
+ซึ่งๆ
+ณ
+ด้วย
+ด้วยกัน
+ด้วยเช่นกัน
+ด้วยที่
+ด้วยเพราะ
+ด้วยว่า
+ด้วยเหตุที่
+ด้วยเหตุนั้น
+ด้วยเหตุนี้
+ด้วยเหตุเพราะ
+ด้วยเหตุว่า
+ด้วยเหมือนกัน
+ดัง
+ดั่ง
+ดังกล่าว
+ดังกับ
+ดั่งกับ
+ดังกับว่า
+ดั่งกับว่า
+ดังเก่า
+ดั่งเก่า
+ดังเคย
+ดั่งเคย
+ต่าง
+ต่างก็
+ต่างหาก
+ต่างๆ
+ตาม
+ตามด้วย
+ตามแต่
+ตามที่
+ตามๆ
+เต็มไปด้วย
+เต็มไปหมด
+เต็มๆ
+แต่
+แต่ก็
+แต่ก่อน
+แต่จะ
+แต่เดิม
+แต่ต้อง
+แต่ถ้า
+แต่ทว่า
+แต่ที่
+แต่นั้น
+แต่เพียง
+แต่เมื่อ
+แต่ไร
+แต่ละ
+แต่ว่า
+แต่ไหน
+แต่อย่างใด
+โต
+โตๆ
+ใต้
+ถ้า
+ถ้าจะ
+ถ้าหาก
+ถึง
+ถึงแก่
+ถึงจะ
+ถึงบัดนั้น
+ถึงบัดนี้
+ถึงเมื่อ
+ถึงเมื่อใด
+ถึงเมื่อไร
+ถึงแม้
+ถึงแม้จะ
+ถึงแม้ว่า
+ถึงอย่างไร
+ถือ
+ถือว่า
+ถูก
+ถูกต้อง
+ถูกๆ
+เถอะ
+เถิด
+ทรง
+ทว่า
+ทั้ง
+ทั้งคน
+ทั้งตัว
+ทั้งที่
+ทั้งนั้น
+ทั้งนั้นด้วย
+ทั้งนั้นเพราะ
+ทั้งนี้
+ทั้งปวง
+ทั้งเป็น
+ทั้งมวล
+ทั้งสิ้น
+ทั้งหมด
+ทั้งหลาย
+ทั้งๆ
+ทั้งๆ
+ที่
+ทัน
+ทันใดนั้น
+ทันที
+ทันทีทันใด
+ทั่ว
+ท
+าไม
+ท
+าไร
+ท
+าให้
+ท
+าๆ
+ที
+ที่
+ที่จริง
+ที่ซึ่ง
+ทีเดียว
+ทีใด
+ที่ใด
+ที่ได้
+ทีเถอะ
+ที่แท้
+ที่แท้จริง
+ที่นั้น
+ที่นี้
+ทีไร
+ทีละ
+ที่ละ
+ที่แล้ว
+ที่ว่า
+ที่สุด
+ที่แห่งนั้น
+ที่ไหน
+ทีๆ
+ที่ๆ
+ทุก
+ทุกคน
+ทุกครั้ง
+ทุกครา
+ทุกคราว
+ทุกชิ้น
+ทุกตัว
+ทุกทาง
+ทุกที
+ทุกที่
+ทุกเมื่อ
+ทุกวัน
+ทุกวันนี้
+ทุกสิ่ง
+ทุกหน
+ทุกแห่ง
+ทุกอย่าง
+ทุกอัน
+ทุกๆ
+เท่า
+เท่ากัน
+เท่ากับ
+เท่าใด
+เท่าที่
+เท่านั้น
+เท่านี้
+เท่าไร
+เท่าไหร่
+แท้
+แท้จริง
+เธอ
+นอก
+นอกจาก
+นอกจากที่
+นอกจากนั้น
+นอกจากนี้
+นอกจากว่า
+นอกนั้น
+นอกเหนือ
+น้อย
+น้อยกว่า
+น้อยๆ
+นะ
+น่ะ
+นัก
+นั่น
+นั้นไว
+นับจากนั้น
+นับจากนี้
+นับแต่นี้
+น่า
+นาง
+นางสาว
+น่าจะ
+นาน
+นานๆ
+นาย
+นำ
+นำพา
+นำมา
+นิด
+นิดหน่อย
+นิดๆ
+นี่
+นี้
+นี่ไง
+นี่นา
+นี่แน่ะ
+นี่แหละ
+นี้แหล่
+นี่เอง
+นี้เอง
+นู่น
+นู้น
+เน้น
+เนี่ย
+เนี่ยเอง
+ใน
+ในช่วง
+ในที่
+ในเมื่อ
+ในระหว่าง
+บน
+บอก
+บอกแล้ว
+บอกว่า
+บ่อย
+บ่อยกว่า
+บ่อยครั้ง
+บ่อยๆ
+บัดดล
+บัดเดี๋ยวนี้
+บัดนั้น
+บัดนี้
+บาง
+บ้าง
+บางกว่า
+บางขณะ
+บางครั้ง
+บางครา
+บางคราว
+บางที
+บางที่
+บางแห่ง
+บางๆ
+แบบ
+ปฏิบัติ
+ประกอบ
+ประการ
+ประการฉะนี้
+ประการใด
+ประการหนึ่ง
+ประมาณ
+ประสบ
+ปรับ
+ปรากฏ
+ปรากฏว่า
+ปัจจุบัน
+ปิด
+เป็น
+เป็นด้วย
+เป็นดัง
+เป็นต้น
+เป็นแต่
+เป็นเพื่อ
+เป็นอัน
+เป็นอันมาก
+เป็นอาทิ
+เปลี่ยน
+เปลี่ยนแปลง
+ไป
+ผ่าน
+ผ่านๆ
+ผิด
+ผิดๆ
+ผู้
+ผู้ใด
+เผื่อ
+เผื่อจะ
+เผื่อที่
+เผื่อว่า
+ฝ่าย
+ฝ่ายใด
+พบ
+พบว่า
+พยายาม
+พร้อม
+พร้อมกัน
+พร้อมกับ
+พร้อมด้วย
+พร้อมทั้ง
+พร้อมที่
+พร้อมเพียง
+พวก
+พวกกัน
+พวกกู
+พวกแก
+พวกเขา
+พวกคุณ
+พวกฉัน
+พวกท่าน
+พวกที่
+พวกเธอ
+พวกนั้น
+พวกนี้
+พวกนู้น
+พวกโน้น
+พวกมัน
+พวกมึง
+พอ
+พอกัน
+พอควร
+พอจะ
+พอดี
+พอตัว
+พอที
+พอที่
+พอเพียง
+พอแล้ว
+พอสม
+พอสมควร
+พอเหมาะ
+พอๆ
+พา
+พึง
+พึ่ง
+พื้นๆ
+พูด
+เพราะ
+เพราะฉะนั้น
+เพราะว่า
+เพิ่ง
+เพิ่งจะ
+เพิ่ม
+เพิ่มเติม
+เพียง
+เพียงแค่
+เพียงใด
+เพียงแต่
+เพียงพอ
+เพียงเพราะ
+เพียงเพื่อ
+เพียงไหน
+เพื่อ
+เพื่อที่
+เพื่อว่า
+เพื่อให้
+ภาค
+ภาย
+ภายใต้
+ภายนอก
+ภายหลัง
+มอง
+มองว่า
+มัก
+มักจะ
+มัน
+มั๊ย
+มา
+มาก
+มากกว่า
+มากมาย
+มิ
+มิฉะนั้น
+มิใช่
+มิได้
+มี
+มีแต่
+มึง
+มุ่ง
+มุ่งเน้น
+มุ่งหมาย
+เมื่อ
+เมื่อก่อน
+เมื่อครั้ง
+เมื่อครั้งก่อน
+เมื่อคราวก่อน
+เมื่อคราวที่
+เมื่อคราว
+เมื่อคืน
+เมื่อเช้า
+เมื่อใด
+เมื่อนั้น
+เมื่อนี้
+เมื่อเย็น
+เมื่อไร
+เมื่อวันวาน
+เมื่อวาน
+เมื่อไหร่
+แม้
+แม้กระทั่ง
+แม้แต่
+แม้นว่า
+แม้ว่า
+ไม่
+ไม่ค่อย
+ไม่ค่อยจะ
+ไม่ค่อยเป็น
+ไม่ใช่
+ไม่เป็นไร
+ไม่ว่า
+ยก
+ยกให้
+ยอม
+ยอมรับ
+ย่อม
+ย่อย
+ยัง
+ยังคง
+ยังงั้น
+ยังงี้
+ยังโง้น
+ยังไง
+ยังจะ
+ยังแต่
+ยาก
+ยาว
+ยาวนาน
+ยิ่ง
+ยิ่งกว่า
+ยิ่งขึ้น
+ยิ่งขึ้นไป
+ยิ่งจน
+ยิ่งจะ
+ยิ่งนัก
+ยิ่งเมื่อ
+ยิ่งแล้ว
+ยิ่งใหญ่
+เยอะ
+เยอะแยะ
+แยะ
+รวด
+รวดเร็ว
+รวม
+ร่วม
+รวมกัน
+ร่วมกัน
+รวมด้วย
+ร่วมด้วย
+รวมถึง
+รวมทั้ง
+ระยะ
+ระหว่าง
+รับ
+รึ
+รือ
+รือว่า
+เร็ว
+เร็วๆ
+เรา
+เราๆ
+เริ่ม
+เรียก
+เรียบ
+เรื่อย
+เรื่อยๆ
+ไร
+ล้วน
+ล้วนจน
+ล้วนแต่
+ละ
+ล่าสุด
+เล็ก
+เล็กน้อย
+เล็กๆ
+เลย
+เล่าว่า
+แล้ว
+แล้วกัน
+แล้วแต่
+แล้วเสร็จ
+วันใด
+วันนั้น
+วันนี้
+วันไหน
+สบาย
+สมัย
+สมัยก่อน
+สมัยนั้น
+สมัยนี้
+สมัยโน้น
+ส่วน
+ส่วนเกิน
+ส่วนด้อย
+ส่วนดี
+ส่วนใด
+ส่วนที่
+ส่วนน้อย
+ส่วนนั้น
+ส่วนมาก
+ส่วนใหญ่
+สั้น
+สั้นๆ
+สามารถ
+สำคัญ
+สิ่ง
+สิ่งใด
+สิ่งนั้น
+สิ่งนี้
+สิ่งไหน
+สิ้น
+สุด
+เสร็จ
+เสร็จแล้ว
+เสียจน
+เสียด้วย
+เสียนี่
+เสียแล้ว
+แสดง
+แสดงว่า
+หน
+หนอ
+หนอย
+หน่อย
+หมด
+หมดกัน
+หมดสิ้น
+หรือไง
+หรือเปล่า
+หรือไม่
+หรือยัง
+หรือไร
+หลังจาก
+หาก
+หากแม้
+หากแม้น
+หากแม้นว่า
+หากว่า
+หาความ
+หาใช่
+หารือ
+เหตุ
+เหตุผล
+เหตุนั้น
+เหตุนี้
+เหตุไร
+เห็นแก่
+เห็นควร
+เห็นจะ
+เห็นว่า
+เหลือ
+เหลือเกิน
+เหล่า
+เหล่านั้น
+เหล่านี้
+แห่ง
+แห่งใด
+แห่งนั้น
+แห่งนี้
+แห่งโน้น
+แห่งไหน
+แหละ
+ให้
+ให้แก่
+ใหญ่
+ใหญ่โต
+ไหน
+ไหนๆ
+อดีต
+อนึ่ง
+อยาก
+อย่าง
+อย่างเช่น
+อย่างดี
+อย่างเดียว
+อย่างใด
+อย่างที่
+อย่างน้อย
+อย่างนั้น
+อย่างนี้
+อย่างโน้น
+อย่างมาก
+อย่างยิ่ง
+อย่างไร
+อย่างไรก็
+อย่างไรก็ได้
+อย่างไรเสีย
+อย่างละ
+อย่างหนึ่ง
+อย่างไหน
+อย่างๆ
+อัน
+อันจะ
+อันใด
+อันได้แก่
+อันที่
+อันที่จริง
+อันที่จะ
+อันเนื่องมาจาก
+อันละ
+อันไหน
+อันๆ
+อาจ
+อาจจะ
+อาจเป็น
+อาจเป็นด้วย
+อีก
+อื่น
+อื่นๆ
+เอง
+เอ็ง
+เอา
+ฯ
+ฯล
+ฯลฯ
\ No newline at end of file
diff --git a/pythainlp/corpus/stopwords.py b/pythainlp/corpus/stopwords.py
index 88ae26bce..9400d96b1 100644
--- a/pythainlp/corpus/stopwords.py
+++ b/pythainlp/corpus/stopwords.py
@@ -1,4 +1,11 @@
 ﻿from __future__ import absolute_import,unicode_literals
+import os
+import codecs
+import pythainlp
+templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'corpus')
+template_file = os.path.join(templates_dir, 'stopwords-th.txt')
+"""
+ข้อมูลตัวเก่า
 def words(lang):
     '''
     stopword ภาษาไทย
@@ -19,4 +26,16 @@ def words(lang):
                  "ให้", "ใน", "โดย", "แห่ง", "แล้ว", "และ", "แรก", "แบบ", "ๆ"]
             }
 
-    if lang == 'thai': return words['thai'] #ถ้า argument ที่ได้เป็น 'thai' ก็จะ return stopwords
\ No newline at end of file
+    if lang == 'thai': return words['thai'] #ถ้า argument ที่ได้เป็น 'thai' ก็จะ return stopwords
+"""
+def words(lang):
+    '''
+    stopword ภาษาไทย
+    วิธีใช้
+    from pythainlp.corpus import stopwords
+    stopwords = stopwords.words('thai')
+    '''
+    if lang == 'thai':
+        with codecs.open(template_file, 'r',encoding='utf8') as f:
+            lines = f.read().splitlines()
+        return lines
\ No newline at end of file
diff --git a/pythainlp/corpus/wordnet.py b/pythainlp/corpus/wordnet.py
index 96203b5e5..43f510c79 100644
--- a/pythainlp/corpus/wordnet.py
+++ b/pythainlp/corpus/wordnet.py
@@ -1,5 +1,15 @@
 ﻿# WordNet ภาษาไทย
 from __future__ import unicode_literals,print_function,absolute_import
+import nltk
+try:
+	nltk.data.find("corpora/omw")
+except:
+	nltk.download('omw')
+	nltk.download('wordnet')
+from nltk.corpus import wordnet 
+'''
+API ตัวเก่า
+'''
 import sqlite3
 import pythainlp
 import os
@@ -11,18 +21,46 @@
 Synset = namedtuple('Synset', 'synset li')
 def getWords(wordid):
 	"""เป็นคำสั่ง ใช้รับคำจาก ID รับค่า str ส่งออกเป็น tuple ('Word', 'synsetid li')"""
+	print("แจ้งเตือน !!! API ตัวนี้จะยกเลิกการใช้งานใน PyThaiNLP 1.5")
 	words = []
 	cur = conn.execute("select * from word_synset where synsetid=?", (wordid,))
 	row = cur.fetchone()
 	return Word(*cur.fetchone())
 def getSynset(synset):
 	"""เป็นคำสั่ง ใช้รับ Synset รับค่า str ส่งออกเป็น tuple ('Synset', 'synset li')"""
+	print("แจ้งเตือน !!! API ตัวนี้จะยกเลิกการใช้งานใน PyThaiNLP 1.5")
 	cursor=conn.execute("select * from word_synset where li=?",(synset,))
 	row=cursor.fetchone()
 	if row:
 		return Synset(*row)
 	else:
 		return None
-if __name__ == "__main__":
-	print(getSynset("ผลักดันกลับ"))
-	print(getWords("02503365-v"))
\ No newline at end of file
+'''
+API ตัวใหม่ เริ่มใช้ตั้งแต่ PyThaiNLP 1.4 เป็นต้นไป
+'''
+def synsets(word, pos=None, lang="tha"):
+	return wordnet.synsets(lemma=word,pos=pos,lang=lang)
+def synset(name_synsets):
+	return wordnet.synset(name_synsets)
+def all_lemma_names(pos=None, lang="tha"):
+	return wordnet.all_lemma_names(pos=pos, lang=lang)
+def all_synsets(pos=None):
+	return wordnet.all_synsets(pos=pos)
+def langs():
+	return wordnet.langs()
+def lemmas(word,pos=None,lang="tha"):
+	return wordnet.lemmas(word,pos=pos,lang=lang)
+def lemma(name_synsets):
+	return wordnet.lemma(name_synsets)
+def lemma_from_key(key):
+	return wordnet.lemma_from_key(key)
+def path_similarity(synsets1,synsets2):
+	return wordnet.path_similarity(synsets1,synsets2)
+def lch_similarity(synsets1,synsets2):
+	return wordnet.lch_similarity(synsets1,synsets2)
+def wup_similarity(synsets1,synsets2):
+	return wordnet.wup_similarity(synsets1,synsets2)
+def morphy(form, pos=None):
+	return wordnet.morphy(form, pos=None)
+def custom_lemmas(tab_file, lang):
+	return wordnet.custom_lemmas(tab_file, lang)
diff --git a/pythainlp/postaggers/text.py b/pythainlp/postaggers/text.py
deleted file mode 100644
index d0935146c..000000000
--- a/pythainlp/postaggers/text.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,division,print_function,unicode_literals
-from pythainlp.segment import segment
-import pythainlp
-import codecs
-import os
-import json
-import six
-import nltk.tag, nltk.data
-templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'corpus')
-template_file = os.path.join(templates_dir, 'thaipos.json')
-#default_tagger = nltk.data.load(nltk.tag._POS_TAGGER)
-def data():
-	with codecs.open(template_file,'r',encoding='utf-8-sig') as handle:
-		model = json.load(handle)
-	return model
-data1 =data()
-#Postaggers ภาษาไทย
-def tag(text):
-	"""รับค่าเป็นข้อความ ''str'' คืนค่าเป็น ''list'' เช่น [('ข้อความ', 'ชนิดคำ')]"""
-	text= segment(text)
-	tagger = nltk.tag.UnigramTagger(model=data1)# backoff=default_tagger)
-	return tagger.tag(text)
\ No newline at end of file
diff --git a/pythainlp/rank/__init__.py b/pythainlp/rank/__init__.py
index 473fdd800..cfc904f3a 100644
--- a/pythainlp/rank/__init__.py
+++ b/pythainlp/rank/__init__.py
@@ -1,11 +1,17 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import,print_function,unicode_literals
 from collections import Counter
+from pythainlp.corpus import stopwords
 #เรียงจำนวนคำของประโยค
-def rank(data):
+def rank(data,stopword=False):
 	"""เรียงจำนวนคำของประโยค
 	รับค่าเป็น ''list'' คืนค่าเป็น ''dict'' [ข้อความ,จำนวน]"""
-	return Counter(data)
+	if stopword==False:
+		rankdata=Counter(data)
+	else:
+		data = [word for word in data if word not in stopwords.words('thai')]
+		rankdata=Counter(data)
+	return rankdata
 if __name__ == "__main__":
 	text = ['แมว','ชอบ','ปลา','และ','แมว','ชอบ','นอน','มาก','เลย','คน','เลี้ยง','กลาย','เป็น','ทาส','แมว']
 	print(rank(text))
diff --git a/pythainlp/romanization/__init__.py b/pythainlp/romanization/__init__.py
index ee26a875a..525bc7e0f 100644
--- a/pythainlp/romanization/__init__.py
+++ b/pythainlp/romanization/__init__.py
@@ -1,8 +1,17 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import,unicode_literals
-import icu
 # ถอดเสียงภาษาไทยเป็น Latin
-def romanization(data):
-	"""เป็นคำสั่ง ถอดเสียงภาษาไทยเป็น Latin รับค่า ''str'' ข้อความ คืนค่าเป็น ''str'' ข้อความ Latin"""
-	thai2latin = icu.Transliterator.createInstance('Thai-Latin')
-	return thai2latin.transliterate(data)
+def romanization(data,engine='pyicu'):
+	"""เป็นคำสั่ง ถอดเสียงภาษาไทยเป็นอังกฤษ 
+	romanization(data,engine='pyicu')
+	มี 2 engine ดังนี้
+	- pyicu ส่งค่า Latin
+	- royin ใช้หลักเกณฑ์การถอดอักษรไทยเป็นอักษรโรมัน ฉบับราชบัณฑิตยสถาน
+	data :
+	รับค่า ''str'' ข้อความ 
+	คืนค่าเป็น ''str'' ข้อความ"""
+	if engine=='royin': 
+    		from .royin import romanization
+	elif engine=='pyicu':
+    		from .pyicu import romanization
+	return romanization(data)
diff --git a/pythainlp/romanization/royin.py b/pythainlp/romanization/royin.py
index 7cd6270f5..9f1d569ab 100644
--- a/pythainlp/romanization/royin.py
+++ b/pythainlp/romanization/royin.py
@@ -1,114 +1,577 @@
-# ยังไม่สามารถถอดเสียงสระได้ ***
-from __future__ import absolute_import
-from pythainlp.segment import segment
-import re
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import,division,unicode_literals,print_function
+'''
+โมดูลถอดเสียงไทยเป็นอังกฤษ
 
-th = u'[ก-ฮ]+'
+พัฒนาต่อจาก new-thai.py
 
-p = [['อักษรไทย', 'ต้น', 'ทั่วไป'],
-     ['ก', 'k', 'k'],
-     ['ข', 'kh', 'k'],
-     ['ฃ', 'kh', 'k'],
-     ['ค', 'kh', 'k'],
-     ['ฅ', 'kh', 'k'],
-     ['ฆ', 'kh', 'k'],
-     ['ง', 'ng', 'ng'],
-     ['จ', 'ch', 't'],
-     ['ฉ', 'ch', 't'],
-     ['ช', 'ch', 't'],
-     ['ซ', 's', 't'],
-     ['ฌ', 'ch', 't'],
-     ['ญ', 'y', 'n'],
-     ['ฎ', 'd', 't'],
-     ['ฏ', 't', 't'],
-     ['ฐ', 'th', 't'],
-     ['ฑ', 'th', 't'],
-     ['ฒ', 'th', 't'],
-     ['ณ', 'n', 'n'],
-     ['ด', 'd', 't'],
-     ['ต', 't', 't'],
-     ['ถ', 'th', 't'],
-     ['ท', 'th', 't'],
-     ['ธ', 'th', 't'],
-     ['น', 'n', 'n'],
-     ['บ', 'b', 'p'],
-     ['ป', 'p', 'p'],
-     ['ผ', 'ph', 'p'],
-     ['ฝ', 'f', 'p'],
-     ['พ', 'ph', 'p'],
-     ['ฟ', 'f', 'p'],
-     ['ภ', 'ph', 'p'],
-     ['ม', 'm', 'm'],
-     ['ย', 'y', ''],
-     ['ร', 'r', 'n'],
-     ['ล', 'l', 'n'],
-     ['ว', 'w', ''],
-     ['ศ', 's', 't'],
-     ['ษ', 's', 't'],
-     ['ส', 's', 't'],
-     ['ห', 'h', ''],
-     ['ฬ', 'l', 'n'],
-     ['อ', '', 'o'],
-     ['ฮ', 'h', '']]
-p2 = dict((x[0], x[2]) for x in p[1:])
-p1 = dict((x[0], x[1]) for x in p[1:])
-d1 = 0
-# p1 อักรต้น
-# p2 ทั่วไป
-# def sub1(txt)
+พัฒนาโดย นาย วรรณพงษ์ ภัททิยไพบูลย์
 
-tone = ['่','้','๊','๋']
-def delete1(data):
-	#โค้ดส่วนตัดวรรณยุกต์ออก
-	for a in tone:
-		if (re.search(a,data)):
-				data = re.sub(a,'',data)
+เริ่มพัฒนา 20 มิ.ย. 2560
+'''
+from pythainlp.tokenize import word_tokenize
+from pythainlp.tokenize import tcc
+from pythainlp.tokenize import etcc
+import re
+consonants = { # พยัญชนะ ต้น สะกด
+'ก':['k','k'],
+'ข':['kh','k'],
+'ฃ':['kh','k'],
+'ค':['kh','k'],
+'ฅ':['kh','k'],
+'ฆ':['kh','k'],
+'ง':['ng','ng'],
+'จ':['ch','t'],
+'ฉ':['ch','t'],
+'ช':['ch','t'],
+'ซ':['s','t'],
+'ฌ':['ch','t'],
+'ญ':['y','n'],
+'ฎ':['d','t'],
+'ฏ':['t','t'],
+'ฐ':['th','t'],
+'ฑ':['th','t'], #* พยัญชนะต้น เป็น d ได้
+'ฒ':['th','t'],
+'ณ':['n','n'],
+'ด':['d','t'],
+'ต':['t','t'],
+'ถ':['th','t'],
+'ท':['th','t'],
+'ธ':['th','t'],
+'น':['n','n'],
+'บ':['b','p'],
+'ป':['p','p'],
+'ผ':['ph','p'],
+'ฝ':['f','p'],
+'พ':['ph','p'],
+'ฟ':['f','p'],
+'ภ':['ph','p'],
+'ม':['m','m'],
+'ย':['y',''],
+'ร':['r','n'],
+'ล':['l','n'],
+'ว':['w',''],
+'ศ':['s','t'],
+'ษ':['s','t'],
+'ส':['s','t'],
+'ห':['h',''],
+'ฬ':['l','n'],
+'อ':['',''],
+'ฮ':['h','']
+}
+consonants_thai= u'[กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬฮ]'
+def deletetone(data):
+    #โค้ดส่วนตัดวรรณยุกต์ออก
+	for tone in ['่','้','๊','๋']:
+		if (re.search(tone,data)):
+				data = re.sub(tone,'',data)
+	if re.search(u'[\w]'+'์',data, re.U):
+		search=re.findall(u'[\w]'+'์',data, re.U)
+		for i in search:
+				data=re.sub(i,'',data,flags=re.U)
 	return data
-# ส่วนพยัญชนะ
-def consonant(text):
-	try:
-		txt = delete1(text)
-		text = list(txt)
-		text1 = ""
-		text1 = p1[text[0]]
-		#print(len(text))
-		#print(text)
-		if len(txt) == 2: # จัดการแก้ไขการสะกดคำที่มี 2 ตัว โดยการเติม o
-			text1 += 'o'
-		for a in txt[1:]:
-			#a=delete1(a)
-			if (re.search(th, a, re.U)):
-				text1 += p2[a]
-			else:
-				text1 += a
-		return text1
-	except:
-		return text
-
-# ส่วนสระ
-def vowel(data):
-	#พัฒนาอยู่
-	#[ก-ฮ]ะ
-	a=list(data)
-	word=consonant(a[0]) + 'a'
-	return word
+def romanization(text):
+    text=deletetone(text)
+    text1=word_tokenize(text,engine='mm')
+    textdata=[]
+    #print(text1)
+    for text in text1:
+        #a1=etcc.etcc(text)
+        a2=tcc.tcc(text)
+        text=re.sub('//','/',a2)
+        if re.search(u'เ[\w]'+'ี'+'ย/ว',text, re.U):
+            '''
+            จัดการกับ เอียว
+            '''
+            #print('เอียว')
+            search=re.findall(u'เ[\w]'+'ี'+'ย/ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'iao',text,flags=re.U)
 
-def romanization(txt):
-	txt = segment(txt)  # (','.join(str(x) for x in txt))  # แยกออกมาเป็น list
-	cc=''
-	#print(txt)
-	for b in txt:
-		cc+=consonant(b)
-	return cc
-    # return txt
-if __name__ == "__main__":
-	print(romanization('ตอง') == "tong")
-	print(romanization('มอง'))
-	print(romanization('มด'))
-	print(romanization('พร'))
-	print(romanization('คน'))
-	print(romanization('พรม')) #!
-	#romanization('แมว')
-	print(vowel("ปะ") == "pa")
-	print(romanization('ชล'))
-	print(romanization('ต้น') == "ton")
\ No newline at end of file
+        if re.search(u'แ[\w]'+'็'+'ว',text, re.U):
+            '''
+            จัดการกับ แอ็ว
+            '''
+            #print('แอ็ว')
+            search=re.findall(u'แ[\w]'+'็'+'ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'aeo',text,flags=re.U)
+        if re.search(u'แ[\w]/[\w]'+'็/'+'ว',text, re.U):
+            '''
+            จัดการกับ แออ็ว
+            '''
+            #print('แออ็ว')
+            search=re.findall(u'แ[\w]/[\w]'+'็/'+'ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+list(i)[3]+'aeo',text,flags=re.U)
+        if re.search(u'แ[\w]/'+'ว',text, re.U):
+            '''
+            จัดการกับ แอว
+            '''
+            #print('แอว')
+            search=re.findall(u'แ[\w]/'+'ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'aeo',text,flags=re.U)
+        if re.search(u'เ[\w]/ว',text, re.U):
+            '''
+            จัดการกับ เอว
+            '''
+            #print('เอว')
+            search=re.findall(u'เ[\w]/ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'eo',text,flags=re.U)
+        if re.search(u'เ[\w]็ว',text, re.U):
+            '''
+            จัดการกับ เอ็ว
+            '''
+            #print('เอ็ว')
+            search=re.findall(u'เ[\w]็ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'eo',text,flags=re.U)
+        if re.search(u'เ[\w]ียะ',text, re.U):
+            '''
+            จัดการกับ เอียะ
+            '''
+            #print('เอียะ')
+            search=re.findall(u'เ[\w]ียะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ia',text,flags=re.U)
+        if re.search(u'เ[\w]ีย',text, re.U):
+            '''
+            จัดการกับ เอีย (1)
+            '''
+            #print('เอีย 1')
+            search=re.findall(u'เ[\w]ีย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ia',text,flags=re.U)
+        if re.search(u'เ[\w]/ีย',text, re.U):
+            '''
+            จัดการกับ เอีย (2)
+            '''
+            #print('เอีย 2')
+            search=re.findall(u'เ[\w]/ีย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ia',text,flags=re.U)
+        if re.search(u'เ[\w]ือ/ย',text, re.U):
+            '''
+            จัดการกับ เอือย
+            '''
+            #print('เอือย')
+            search=re.findall(u'เ[\w]ือ/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ueai',text,flags=re.U)
+        if re.search(u'เ[\w]ือะ',text, re.U):
+            '''
+            จัดการกับ เอือะ
+            '''
+            #print('เอือะ')
+            search=re.findall(u'เ[\w]ือะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'uea',text,flags=re.U)
+        if re.search(u'เ[\w]ือ',text, re.U):
+            '''
+            จัดการกับ เอือ
+            '''
+            #print('เอือ')
+            search=re.findall(u'เ[\w]ือ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'uea',text,flags=re.U)
+        if re.search(u'โ[\w]/ย',text, re.U):
+            '''
+            จัดการกับ โอย
+            '''
+            #print('โอย')
+            search=re.findall(u'โ[\w]/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'oi',text,flags=re.U)
+        if re.search(u'[\w]/อ/ย',text, re.U):
+            '''
+            จัดการกับ ออย
+            '''
+            #print('ออย')
+            search=re.findall(u'[\w]/อ/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'oi',text,flags=re.U)
+        if re.search(u'โ[\w]ะ',text, re.U):
+            '''
+            จัดการกับ โอะ
+            '''
+            #print('โอะ')
+            search=re.findall(u'โ[\w]ะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'o',text,flags=re.U)
+        if re.search(u'โ[\w]',text, re.U):
+            '''
+            จัดการกับ โอ
+            '''
+            #print('โอ')
+            search=re.findall(u'โ[\w]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'o',text,flags=re.U)
+        if re.search(u'เ/[\w]า/ะ/',text, re.U):
+            '''
+            จัดการกับ เอาะ (1)
+            '''
+            #print('เอาะ 1')
+            search=re.findall(u'เ/[\w]า/ะ/',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[2]+'o',text,flags=re.U)
+        if re.search(u'เ[\w]าะ',text, re.U):
+            '''
+            จัดการกับ เอาะ (2)
+            '''
+            #print('เอาะ 2')
+            search=re.findall(u'เ[\w]าะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'o',text,flags=re.U)
+        if re.search(u'อำ',text, re.U):
+            '''
+            จัดการกับ อำ
+            '''
+            #print('อำ')
+            search=re.findall(u'อำ',text, re.U)
+            for i in search:
+                text=re.sub(i,'am',text,flags=re.U)
+        if re.search(u'อี',text, re.U):
+            '''
+            จัดการกับ อี
+            '''
+            #print('"อี"')
+            search=re.findall(u'อี',text, re.U)
+            for i in search:
+                text=re.sub(i,'i',text,flags=re.U)
+        # เออ
+        if re.search(u'เ[\w]/อ',text, re.U):
+            '''
+            จัดการกับ เออ
+            '''
+            #print('เออ')
+            search=re.findall(u'เ[\w]/อ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'oe',text,flags=re.U)
+        if re.search(u'[\w]/อ',text, re.U):
+            '''
+            จัดการกับ ออ
+            '''
+            #print('ออ')
+            search=re.findall(u'[\w]/อ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'o',text,flags=re.U)
+        if re.search(u'[\w]ัวะ',text, re.U):
+            '''
+            จัดการกับ อัวะ
+            '''
+            #print('อัวะ')
+            search=re.findall(u'[\w]ัวะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ua',text,flags=re.U)
+        if re.search(u'[\w]ัว',text, re.U):
+            '''
+            จัดการกับ อัว
+            '''
+            #print('อัว')
+            search=re.findall(u'[\w]ัว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ua',text,flags=re.U)
+        # ใอ,อัย , อาย
+        if re.search(u'ใ[\w]',text, re.U):
+            '''
+            จัดการกับ ใอ
+            '''
+            #print('ใอ')
+            search=re.findall(u'ใ[\w]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ai',text,flags=re.U)
+        if re.search(u'[\w]ัย',text, re.U):
+            '''
+            จัดการกับ อัย
+            '''
+            #print('อัย')
+            search=re.findall(u'[\w]ัย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ai',text,flags=re.U)
+        if re.search(u'[\w]า/ย',text, re.U):
+            '''
+            จัดการกับ อาย
+            '''
+            #print('อาย')
+            search=re.findall(u'[\w]า/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ai',text,flags=re.U)
+        #เอา, อาว
+        if re.search(u'เ[\w]า',text, re.U):
+            '''
+            จัดการกับ เอา
+            '''
+            #print('เอา')
+            search=re.findall(u'เ[\w]า',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ao',text,flags=re.U)
+        if re.search(u'[\w]า/ว',text, re.U):
+            '''
+            จัดการกับ อาว
+            '''
+            #print('อาว')
+            search=re.findall(u'[\w]า/ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ao',text,flags=re.U)
+        #อุย
+        if re.search(u'[\w]ุ/ย',text, re.U):
+            '''
+            จัดการกับ อุย
+            '''
+            #print('อุย')
+            search=re.findall(u'[\w]ุ/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ui',text,flags=re.U)
+        #เอย
+        if re.search(u'เ[\w]/ย',text, re.U):
+            '''
+            จัดการกับ เอย
+            '''
+            #print('เอย')
+            search=re.findall(u'เ[\w]/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'oei',text,flags=re.U)
+        # แอะ, แอ
+        if re.search(u'แ[\w]ะ',text, re.U):
+            '''
+            จัดการกับ แอะ
+            '''
+            #print('แอะ')
+            search=re.findall(u'แ[\w]ะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ae',text,flags=re.U)
+        if re.search(u'แ[\w]',text, re.U):
+            '''
+            จัดการกับ แอ
+            '''
+            #print('แอ')
+            search=re.findall(u'แ[\w]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ae',text,flags=re.U)
+        # เอะ
+        if re.search(u'เ[\w]ะ',text, re.U):
+            '''
+            จัดการกับ เอะ
+            '''
+            #print('เอะ')
+            search=re.findall(u'เ[\w]ะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'e',text,flags=re.U)
+        # อิว
+        if re.search(u'[\w]ิ/ว',text, re.U):
+            '''
+            จัดการกับ อิว
+            '''
+            #print('อิว')
+            search=re.findall(u'[\w]ิ/ว',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'io',text,flags=re.U)
+        # อวย
+        if re.search(u'[\w]/ว/ย',text, re.U):
+            '''
+            จัดการกับ อวย
+            '''
+            #print('อวย')
+            search=re.findall(u'[\w]/ว/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'uai',text,flags=re.U)
+        # -ว-
+        if re.search(u'[\w]/ว/[\w]',text, re.U):
+            '''
+            จัดการกับ -ว-
+            '''
+            #print('-ว-')
+            search=re.findall(u'[\w]/ว/[\w]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ua'+list(i)[4],text,flags=re.U)
+        # เ–็,เอ
+        if re.search(u'เ[\w]'+'็',text, re.U):
+            '''
+            จัดการกับ เ–็
+            '''
+            #print('เ–็')
+            search=re.findall(u'เ[\w]'+'็',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'e',text,flags=re.U)
+        if re.search(u'เ[\w]/',text, re.U):
+            '''
+            จัดการกับ เอ
+            '''
+            #print('เอ')
+            search=re.findall(u'เ[\w]/',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'e',text,flags=re.U)
+        #ไอย
+        if re.search(u'ไ[\w]/ย',text, re.U):
+            '''
+            จัดการกับ ไอย
+            '''
+            #print('ไอย')
+            search=re.findall(u'ไ[\w]/ย',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ai',text,flags=re.U)
+        #ไอ
+        if re.search(u'ไ[\w]',text, re.U):
+            '''
+            จัดการกับ ไอ
+            '''
+            #print('ไอ')
+            search=re.findall(u'ไ[\w]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[1]+'ai',text,flags=re.U)
+        #อะ
+        if re.search(u'[\w]ะ',text, re.U):
+            '''
+            จัดการกับ อะ
+            '''
+            #print('อะ')
+            search=re.findall(u'[\w]ะ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'a',text,flags=re.U)
+        # –ั 
+        if re.search(u'[\w]ั',text, re.U):
+            '''
+            จัดการกับ –ั 
+            '''
+            #print('–ั ')
+            search=re.findall(u'[\w]ั',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'a',text,flags=re.U)
+        # รร
+        if re.search(u'[\w]/ร/ร/[\w][^ก-ฮ]',text, re.U):
+            '''
+            จัดการกับ -รร-
+            '''
+            #print('-รร- 1')
+            search=re.findall(u'[\w]/ร/ร/[\w][^ก-ฮ]',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'an'+list(i)[6]+list(i)[7],text,flags=re.U)
+        if re.search(u'[\w]/ร/ร/',text, re.U):
+            '''
+            จัดการกับ -รร-
+            '''
+            #print('-รร- 2')
+            search=re.findall(u'[\w]/ร/ร/',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'a',text,flags=re.U)
+        #อา
+        if re.search(u'อา',text, re.U):
+            '''
+            จัดการกับ อา 1
+            '''
+            #print('อา 1')
+            search=re.findall(u'อา',text, re.U)
+            for i in search:
+                text=re.sub(i,'a',text,flags=re.U)
+        if re.search(u'[\w]า',text, re.U):
+            '''
+            จัดการกับ อา 2
+            '''
+            #print('อา 2')
+            search=re.findall(u'[\w]า',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'a',text,flags=re.U)
+                #อำ
+        if re.search(u'[\w]ำ',text, re.U):
+            '''
+            จัดการกับ อำ 1
+            '''
+            #print('อำ 1')
+            search=re.findall(u'[\w]ำ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'am',text,flags=re.U)
+        #อิ , อี
+        if re.search(u'[\w]ิ',text, re.U):
+            '''
+            จัดการกับ อิ 
+            '''
+            #print('อิ')
+            search=re.findall(u'[\w]ิ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'i'+'/',text,flags=re.U)
+        if re.search(u'[\w]ี',text, re.U):
+            '''
+            จัดการกับ อี
+            '''
+            #print('อี')
+            search=re.findall(u'[\w]ี',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'i'+'/',text,flags=re.U)
+        #อึ , อื
+        if re.search(u'[\w]ึ',text, re.U):
+            '''
+            จัดการกับ อึ
+            '''
+            #print('อึ')
+            search=re.findall(u'[\w]ึ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ue'+'/',text,flags=re.U)
+        if re.search(u'[\w]ื',text, re.U):
+            '''
+            จัดการกับ อื
+            '''
+            #print('อื')
+            search=re.findall(u'[\w]ื',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'ue'+'/',text,flags=re.U)
+        #อุ , อู
+        if re.search(u'[\w]ุ',text, re.U):
+            '''
+            จัดการกับ อุ
+            '''
+            #print('อุ')
+            search=re.findall(u'[\w]ุ',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'u'+'/',text,flags=re.U)
+        if re.search(u'[\w]ู',text, re.U):
+            '''
+            จัดการกับ อู
+            '''
+            #print('อู')
+            search=re.findall(u'[\w]ู',text, re.U)
+            for i in search:
+                text=re.sub(i,list(i)[0]+'u'+'/',text,flags=re.U)
+        if re.search(r'[^กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬฮ]',text, re.U):
+            '''
+             ใช้ในกรณีคำนั้นมีสระด้วย จะได้เอาพยัญชนะตัวแรกไปเทียบ
+            '''
+            d=re.search(consonants_thai,text,re.U)
+            text=re.sub(d.group(0),consonants[d.group(0)][0],text,flags=re.U)
+        listtext=list(text)
+        if re.search(consonants_thai,listtext[0], re.U):
+	        '''
+	        จัดการกับพยัญชนะต้น
+	        '''
+	        listtext[0]=consonants[listtext[0]][0]
+	        two=False
+	        if len(listtext)==2:
+		        if  re.search(consonants_thai,listtext[1], re.U):
+			        '''
+			        จัดการกับพยัญชนะ 2 ตัว และมีแค่ 2 ตั   และมีแค่ 2 ตัวติดกันในคำ 
+			        '''
+			        listtext.append(consonants[listtext[1]][1])
+			        listtext[1]='o'
+			        two=True
+        else:
+	        two=False
+        i=0
+        while i<len(listtext) and two==False:
+	        if re.search(consonants_thai,listtext[i], re.U):
+		        '''
+		        ถ้าหากเป็นพยัญชนะ
+		        '''
+		        listtext[i]=consonants[listtext[i]][1]
+	        i+=1
+        text=''.join(listtext) # คืนค่ากลับสู่ str
+        #print(text)
+        textdata.append(re.sub('/','',text))
+    return ''.join(textdata)
+if __name__ == '__main__':
+    print(romanization('วัน')+romanization('นะ')+romanization('พง'))
+    print(romanization('นัด')+romanization('ชะ')+romanization('โนน'))
+    print(romanization('สรรพ'))
+    print(romanization('สรร')+romanization('หา'))
+    print(romanization('สรร')+romanization('หา'))
+    print(romanization('แมว'))
\ No newline at end of file
diff --git a/pythainlp/segment/__init__.py b/pythainlp/segment/__init__.py
deleted file mode 100644
index 0de882ccc..000000000
--- a/pythainlp/segment/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# -*- coding: utf-8 -*-
-from __future__ import absolute_import,unicode_literals
-#__all__ = ['pyicu', 'dict','isthai','thai']
-try:
-	from .pyicu import segment
-except:
-	from .dict import segment
\ No newline at end of file
diff --git a/pythainlp/segment/mm.py b/pythainlp/segment/mm.py
deleted file mode 100644
index 4eb6b65b5..000000000
--- a/pythainlp/segment/mm.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# -*- coding: utf-8 -*-
-
-'''
-based on algorithm from
-http://www.aclweb.org/anthology/E14-4016
-
-fork from https://github.com/narongdejsrn/pythaiwordcut
-License: MIT 
-'''
-
-from __future__ import print_function
-from six.moves import range,zip
-import re
-from .thai import newdata # load dictionary
-from pythainlp.corpus import stopwords # load  stopwords
-
-class wordcut(object):
-    def __init__(self, removeRepeat=True, stopDictionary="", removeSpaces=True, minLength=1, stopNumber=False, removeNonCharacter=False, caseSensitive=True, ngram=(1,1), negation=False):
-        d = newdata() # load dictionary
-        # load negation listdir
-        self.negationDict = ['ไม่','แต่']
-        self.stopword = False
-        self.stopdict = stopwords.words('thai')
-
-        self.trie = d
-        self.removeRepeat = removeRepeat
-        self.stopNumber = stopNumber
-        self.removeSpaces = removeSpaces
-        self.minLength = minLength
-        self.removeNonCharacter = removeNonCharacter
-        self.caseSensitive = caseSensitive
-        self.ngram = ngram
-        self.negation = negation
-        self.onNegation = False
-
-    def determine(self, word):
-        if self.stopNumber and word.isdigit():
-            return False
-
-        if self.removeSpaces and word.isspace():
-            return False
-
-        if len(word) < self.minLength:
-            return False
-
-        if self.removeNonCharacter:
-            match = re.search(u"[0-9A-Za-z\u0E00-\u0E7F]+", word)
-            if not match:
-                return False
-
-        return True
-
-    # Find maximum matching in Trie if match return id else return -1
-    def searchTrie(self, word):
-        # remove negation if see a space
-        if(word[0:1] == " "):
-            self.onNegation = False
-
-        # check latin words
-        match = re.search(u"[A-Za-z\d]*", word)
-        if match.group(0):
-            if not self.caseSensitive:
-                return match.group(0).lower()
-            else:
-                return match.group(0)
-
-        # check number
-        match = re.search(u"[\d]*", word)
-        if match.group(0):
-            return match.group(0)
-
-        longest = 0
-        maxData = None
-
-        for x in range(20):
-            if word[0:x] in self.trie:
-                longest = len(word[0:x])
-                maxData = word[0:x]
-
-        if longest > 20:
-            for data in self.trie.keys(word[0:longest]):
-                if(len(data) > longest):
-                    if data in word[0:len(data)]:
-                        wordLength = 0
-                        longest = len(data)
-                        maxData = data
-
-
-        if maxData:
-            try:
-                # Special check for case like ๆ
-                if word[len(maxData)] == u'ๆ':
-                    return word[0:(len(maxData) + 1)]
-                else:
-                    return maxData
-            except:
-                return maxData
-        else:
-            return -1
-
-    def transform(self, wordArray):
-        for dd in self.stopdict:
-            try:
-                if self.caseSensitive:
-                    wordArray.remove(dd)
-                else:
-                    wordArray.remove(dd.lower())
-            except ValueError:
-                pass
-
-        return wordArray
-
-    # c = sentence which represent as char
-    # N = number of character
-    def find_segment(self, c):
-        i = 0
-        N = len(c)
-        arr = []
-        while(i < N):
-            j = self.searchTrie(c[i:N])
-            if(j == -1):
-                if(self.removeRepeat is False or c[i] != c[i - 1]):
-                    arr.append(c[i])
-                    i = i + 1
-                else:
-                    i = i + 1
-            else:
-                k = j
-                if self.negation:
-                    if self.onNegation:
-                        k = 'NOT_' + j
-
-                    if j in self.negationDict:
-                        self.onNegation = True
-
-                arr.append(k)
-                i = i + len(j)
-        return arr
-
-    def find_ngrams(self, input_list, n):
-        return zip(*[input_list[i:] for i in range(n)])
-
-    def segment(self, c):
-        result = self.find_segment(c)
-        if self.stopword:
-            result = self.transform(result)
-
-        result = [x for x in result if self.determine(x)]
-
-        lastresult = []
-        for x in range(self.ngram[0], self.ngram[1]+1):
-            for r in self.find_ngrams(result, x):
-                match = re.search(u"[A-Za-z\d]+", ''.join(r))
-                if not match:
-                    lastresult.append(''.join(r))
-                else:
-                    if self.negation:
-                        lastresult.append(''.join(r))
-                    else:
-                        lastresult.append(' '.join(r))
-        return lastresult
-def segment(text):
-    pt = wordcut(stopNumber=False, removeNonCharacter=True, caseSensitive=False, negation=True, removeRepeat=True)
-    return pt.segment(text)
\ No newline at end of file
diff --git a/pythainlp/sentiment/__init__.py b/pythainlp/sentiment/__init__.py
index c2535f447..1b6559a9a 100644
--- a/pythainlp/sentiment/__init__.py
+++ b/pythainlp/sentiment/__init__.py
@@ -9,7 +9,17 @@
 import pythainlp
 import os
 from pythainlp.tokenize import word_tokenize
-import dill
+try:
+    import dill                    
+except ImportError:
+	import pip
+	pip.main(['install','dill'])
+	try:
+		import dill
+	except ImportError:
+		print("Error ! using 'pip install dill'")
+		sys.exit(0)
+
 templates_dir = os.path.join(os.path.dirname(pythainlp.__file__), 'sentiment')
 def sentiment(text):
 	"""
diff --git a/pythainlp/sentiment/build_pythainlp.py b/pythainlp/sentiment/build_pythainlp.tool
similarity index 100%
rename from pythainlp/sentiment/build_pythainlp.py
rename to pythainlp/sentiment/build_pythainlp.tool
diff --git a/pythainlp/soundex.py b/pythainlp/soundex.py
new file mode 100644
index 000000000..702bc8b9c
--- /dev/null
+++ b/pythainlp/soundex.py
@@ -0,0 +1,80 @@
+'''
+Thai soundex
+
+โค้ดพัฒนาโดย คุณ Korakot Chaovavanich (จาก https://gist.github.com/korakot/0b772e09340cac2f493868da035597e8)
+'''
+import re
+def LK82(s):
+    '''
+    LK82 - กฎการเข้ารหัสซาวน์เด็กซ์ของ  วิชิตหล่อจีระชุณห์กุล  และ  เจริญ  คุวินทร์พันธุ์
+    LK82(str)
+    '''
+    t1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสญยฎดฏตณนฐฑฒถทธบปผพภฝฟมรลฬฤฦวหฮอ","กกกกกกงจชชชซซซซยยดดตตนนททททททบปพพพฟฟมรรรรรวหหอ")
+    t2 = str.maketrans("กขฃคฅฆงจฉชซฌฎฏฐฑฒดตถทธศษสญณนรลฬฤฦบปพฟภผฝมำยวไใหฮาๅึืเแโุูอ","1111112333333333333333333444444445555555667777889AAABCDEEF")
+    res = []
+    s = re.sub("[่-๋]", "", s)  # 4.ลบวรรณยุกต์
+    s = re.sub('จน์|มณ์|ณฑ์|ทร์|ตร์|[ก-ฮ]์|[ก-ฮ][ะ-ู]์', "", s) # 4.ลบตัวการันต์
+    s = re.sub("[็ํฺๆฯ]", "", s)  # 5.ทิ้งไม้ไต่คู่ ฯลฯ
+    # 6.เข้ารหัสตัวแรก
+    if 'ก'<=s[0]<='ฮ':
+        res.append(s[0].translate(t1))
+        s = s[1:]
+    else:
+        s = s[1:]
+        res.append(s[0].translate(t2))
+        s = s[2:]
+    # เข้ารหัสตัวที่เหลือ
+    i_v = None # ตำแหน่งตัวคั่นล่าสุด (สระ)
+    for i,c in enumerate(s):
+        if c in "ะัิี": # 7. ตัวคั่นเฉยๆ
+            i_v = i
+            res.append('')
+        elif c in "าๅึืู": # 8.คั่นและใส่
+            i_v = i
+            res.append(c.translate(t2))
+        elif c == 'ุ': # 9.สระอุ
+            i_v = i
+            if i==0 or (s[i-1] not in "ตธ"):
+                res.append(c.translate(t2))
+            else:
+                res.append('')
+        elif c in 'หอ' and (i+1<len(c) and (c[i+1] in "ึืุู")):
+            res.append(c.translate(t2))
+        elif c in 'รวยฤฦ':
+            if i_v == i-1 or (i+1<len(c) and (c[i+1] in "ึืุู")):
+                res.append(c.translate(t2))
+        else:
+            res.append(c.translate(t2)) # 12.
+    # 13. เอาตัวซ้ำออก
+    res2 = [res[0]]
+    for i in range(1, len(res)):
+        if res[i] != res[i-1]:
+            res2.append(res[i])
+    # 14. เติมศูนย์ให้ครบ ถ้าเกินก็ตัด
+    return ("".join(res2)+"0000")[:5]
+def Udom83(s):
+    tu1 = str.maketrans("กขฃคฅฆงจฉชฌซศษสฎดฏตฐฑฒถทธณนบปผพภฝฟมญยรลฬฤฦวอหฮ" ,"กขขขขขงจชชชสสสสดดตตททททททนนบปพพพฟฟมยยรรรรรวอฮฮ")
+    tu2 = str.maketrans("มวำกขฃคฅฆงยญณนฎฏดตศษสบปพภผฝฟหอฮจฉชซฌฐฑฒถทธรฤลฦ","0001111112233344444445555666666777778888889999")
+    s = re.sub('รร([เ-ไ])', 'ัน\\1', s)  # 4.
+    s = re.sub('รร([ก-ฮ][ก-ฮเ-ไ])', 'ั\\1', s) # 5.
+    s = re.sub('รร([ก-ฮ][ะ-ู่-์])','ัน\\1', s)
+    s = re.sub('รร', 'ัน', s)
+    
+    s = re.sub('ไ([ก-ฮ]ย)', '\\1', s)   # 2.
+    s = re.sub('[ไใ]([ก-ฮ])','\\1ย', s)
+
+    s = re.sub('ำ(ม[ะ-ู])', 'ม\\1', s)   # 3.
+    s = re.sub('ำม', 'ม', s)
+    s = re.sub('ำ', 'ม', s)
+
+    s = re.sub('จน์|มณ์|ณฑ์|ทร์|ตร์|[ก-ฮ]์|[ก-ฮ][ะ-ู]์', "", s) # 6.
+    s = re.sub('[ะ-์]', '', s) # 7.
+    sd = s[0].translate(tu1)
+    sd += s[1:].translate(tu2)
+    return (sd+'000000')[:7]
+if __name__ == '__main__':
+    print(LK82('รถ'))
+    print(LK82('รส'))
+    print(LK82('รด'))
+    print(LK82('จัน'))
+    print(LK82('จันทร์'))
\ No newline at end of file
diff --git a/pythainlp/spell/hunspell.py b/pythainlp/spell/hunspell.py
index 49b8438da..e451a3e86 100644
--- a/pythainlp/spell/hunspell.py
+++ b/pythainlp/spell/hunspell.py
@@ -30,7 +30,7 @@ def spell(word,lang='th_TH'):
 			getoutput = getoutput.split(",")
 		del get
 		return getoutput
-	except:
+	except subprocess.CalledProcessError:
 		print('plase install hunspell')
 		return None
 if __name__ == "__main__":
diff --git a/pythainlp/tag/__init__.py b/pythainlp/tag/__init__.py
index fe8072339..53f426850 100644
--- a/pythainlp/tag/__init__.py
+++ b/pythainlp/tag/__init__.py
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 #  TODO ปรับ API ให้เหมือน nltk
 from __future__ import absolute_import,division,print_function,unicode_literals
-from pythainlp.postaggers import tag
 import sys
 def pos_tag(text,engine='old'):
 	"""
@@ -13,16 +12,25 @@ def pos_tag(text,engine='old'):
 	* artagger เป็น RDR POS Tagger
 	"""
 	if engine=='old':
-    		from pythainlp.postaggers import tag
+    		from .old import tag
 	elif engine=='artagger':
 			if sys.version_info < (3,4):
     				sys.exit('Sorry, Python < 3.4 is not supported')
 			def tag(text1):
-					from artagger import Tagger
+					try:
+						from artagger import Tagger
+					except ImportError:
+						import pip
+						pip.main(['install','https://github.com/franziz/artagger/archive/master.zip'])
+						try:
+							from artagger import Tagger
+						except ImportError:
+							print("Error ! using 'pip install https://github.com/franziz/artagger/archive/master.zip'")
+							sys.exit(0)
 					tagger = Tagger()
 					words = tagger.tag(' '.join(text1))
 					totag=[]
 					for word in words:
     						totag.append((word.word, word.tag))
 					return totag
-	return tag(text)
\ No newline at end of file
+	return tag(text)
diff --git a/pythainlp/postaggers/__init__.py b/pythainlp/tag/old.py
similarity index 96%
rename from pythainlp/postaggers/__init__.py
rename to pythainlp/tag/old.py
index 17e69e6ce..6a220faf6 100644
--- a/pythainlp/postaggers/__init__.py
+++ b/pythainlp/tag/old.py
@@ -16,4 +16,4 @@ def tag(text):
 	"""
 	รับค่าเป็น ''list'' คืนค่าเป็น ''list'' เช่น [('ข้อความ', 'ชนิดคำ')]"""
 	tagger = nltk.tag.UnigramTagger(model=data())# backoff=default_tagger)
-	return tagger.tag(text)
\ No newline at end of file
+	return tagger.tag(text)
diff --git a/pythainlp/test/__init__.py b/pythainlp/test/__init__.py
index 645e7b0c6..687902d8c 100644
--- a/pythainlp/test/__init__.py
+++ b/pythainlp/test/__init__.py
@@ -6,29 +6,57 @@
 from pythainlp.corpus import wordnet
 from pythainlp.tokenize import word_tokenize
 from pythainlp.rank import rank
-from pythainlp.change import *
+from pythainlp.change import texttothai,texttoeng
 from pythainlp.number import numtowords
 from pythainlp.tag import pos_tag
 from pythainlp.romanization import romanization
 from pythainlp.date import now
+from pythainlp.tokenize import tcc,etcc
+from pythainlp.soundex import LK82,Udom83
+from pythainlp.corpus import stopwords
+from pythainlp.MetaSound import MetaSound
 from collections import namedtuple
 Synset = namedtuple('Synset', 'synset li')
 class TestUM(unittest.TestCase):
-	def testSegment(self):
+	"""
+	ระบบทดสอบการทำงานของโค้ดของ PyThaiNLP
+	"""
+	def test_segment(self):
 		self.assertEqual(word_tokenize('ฉันรักภาษาไทยเพราะฉันเป็นคนไทย'),[u'ฉัน', u'รัก', u'ภาษา', u'ไทย', u'เพราะ', u'ฉัน', u'เป็น', u'คน', u'ไทย'])
-	def testSegmentDict(self):
+	def test_segment_dict(self):
 		self.assertEqual(word_tokenize('ฉันรักภาษาไทยเพราะฉันเป็นคนไทย',engine='dict'),[u'ฉัน', u'รัก', u'ภาษาไทย', u'เพราะ', u'ฉัน', u'เป็น', u'คนไทย'])
-	def testRank(self):
+	def test_segment_mm(self):
+		self.assertEqual(word_tokenize('ฉันรักภาษาไทยเพราะฉันเป็นคนไทย',engine='mm'),[u'ฉัน', u'รัก', u'ภาษาไทย', u'เพราะ', u'ฉัน', u'เป็น', u'คนไทย'])
+	def test_segment_newmm(self):
+		self.assertEqual(word_tokenize('ฉันรักภาษาไทยเพราะฉันเป็นคนไทย',engine='newmm'),[u'ฉัน', u'รัก', u'ภาษาไทย', u'เพราะ', u'ฉัน', u'เป็น', u'คนไทย'])
+	def test_rank(self):
 		self.assertEqual(rank(["แมว","คน","แมว"]),Counter({'แมว': 2, 'คน': 1}))
-	def testChange(self):
+	def test_change(self):
 		self.assertEqual(texttothai("l;ylfu8iy["),'สวัสดีครับ')
-	def testRomanization(self):
+		self.assertEqual(texttoeng('สวัสดีครับ'),"l;ylfu8iy[")
+	def test_romanization1(self):
 		self.assertEqual(romanization("แมว"),'mæw')
-	def testNumber(self):
+	def test_romanization2(self):
+		self.assertEqual(romanization("แมว",engine="royin"),'maeo')
+	def test_number(self):
 		self.assertEqual(numtowords(5611116.50),'ห้าล้านหกแสนหนึ่งหมื่นหนึ่งพันหนึ่งร้อยสิบหกบาทห้าสิบสตางค์')
-	def testTag(self):
+	def test_tcc(self):
+		self.assertEqual(tcc.tcc('ประเทศไทย'),'ป/ระ/เท/ศ/ไท/ย')
+	def test_etcc(self):
+		self.assertEqual(etcc.etcc('คืนความสุข'),'/คืน/ความสุข')
+	def test_lk82(self):
+		self.assertEqual(LK82('รถ'),'ร3000')
+		self.assertEqual(Udom83('รถ'),'ร800000')
+	def test_ms(self):
+		self.assertEqual(MetaSound('คน'),'15')
+	def test_wordnet(self):
+		self.assertEqual(wordnet.synset('spy.n.01').lemma_names('tha'),['สปาย', 'สายลับ'])
+		self.assertEqual(wordnet.langs()!=None,True)
+	def test_stopword(self):
+		self.assertEqual(stopwords.words('thai')!=None,True)
+	def test_tag(self):
 		self.assertEqual(pos_tag(word_tokenize("คุณกำลังประชุม"),engine='old'),[('คุณ', 'PPRS'), ('กำลัง', 'XVBM'), ('ประชุม', 'VACT')])
-	def testTagnew(self):
+	def test_tag_new(self):
     		if sys.version_info > (3,3):
     				self.assertEqual(pos_tag(word_tokenize("ผมรักคุณ"),engine='artagger'),[('ผม', 'PPRS'), ('รัก', 'VSTA'), ('คุณ', 'PPRS')])
 if __name__ == '__main__':
diff --git a/pythainlp/tokenize/__init__.py b/pythainlp/tokenize/__init__.py
index 80b446515..c48f49d58 100644
--- a/pythainlp/tokenize/__init__.py
+++ b/pythainlp/tokenize/__init__.py
@@ -1,19 +1,47 @@
 # -*- coding: utf-8 -*-
-from __future__ import absolute_import,division,unicode_literals
+from __future__ import absolute_import,division,unicode_literals,print_function
 def word_tokenize(text,engine='icu'):
 	"""
 	ระบบตัดคำภาษาไทย
 
 	word_tokenize(text,engine='icu')
 	engine มี
-	- icu
-	- dict
-	- mm ใช้ Maximum Matching algorithm
+	- icu -  engine ตัวดั้งเดิมของ PyThaiNLP (ความแม่นยำต่ำ) และเป็นค่าเริ่มต้น
+	- dict - ใช้ dicu ในการตัดคำไทย จะคืนค่า False หากไม่สามารถตัดคำไทย
+	- mm ใช้ Maximum Matching algorithm - โค้ดชุดเก่า
+	- newmm - ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่
+	- pylexto ใช้ LexTo ในการตัดคำ
+	- deepcut ใช้ Deep Neural Network ในการตัดคำภาษาไทย
 	"""
 	if engine=='icu':
-    		from pythainlp.segment.pyicu import segment
+    		'''
+			ตัดคำภาษาไทยโดยใช้ icu ในการตัดคำ
+			'''
+    		from .pyicu import segment
 	elif engine=='dict':
-    		from pythainlp.segment.dict import segment
+    		'''
+			ใช้ dicu ในการตัดคำไทย
+			จะคืนค่า False หากไม่สามารถตัดคำไทย
+			'''
+    		from .dictsegment import segment
 	elif engine=='mm':
-    		from pythainlp.segment.mm import segment
-	return segment(text)
\ No newline at end of file
+    		'''
+			ใช้ Maximum Matching algorithm - โค้ดชุดเก่า
+			'''
+    		from .mm import segment
+	elif engine=='newmm':
+    		'''
+			ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย โค้ดชุดใหม่
+			'''
+    		from .newmm import mmcut as segment
+	elif engine=='pylexto':
+    		'''
+			ใช้ LexTo ในการตัดคำ
+			'''
+    		from .pylexto import segment
+	elif engine=='deepcut':
+    		'''
+			ใช้ Deep Neural Network ในการตัดคำภาษาไทย
+			'''
+    		from .deepcut import segment
+	return segment(text)
diff --git a/pythainlp/tokenize/deepcut.py b/pythainlp/tokenize/deepcut.py
new file mode 100644
index 000000000..0eb3fae8d
--- /dev/null
+++ b/pythainlp/tokenize/deepcut.py
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import,unicode_literals
+import sys
+try:
+    import deepcut
+except ImportError:
+	'''
+    ในกรณ๊ที่ยังไม่ติดตั้ง deepcut ในระบบ
+    '''
+	import pip
+	pip.main(['install','deepcut'])
+	try:
+		from pylexto import LexTo
+	except ImportError:
+		sys.exit('Error ! using pip install deepcut')
+def segment(text):
+    return deepcut.tokenize(text)
\ No newline at end of file
diff --git a/pythainlp/segment/dict.py b/pythainlp/tokenize/dictsegment.py
similarity index 100%
rename from pythainlp/segment/dict.py
rename to pythainlp/tokenize/dictsegment.py
diff --git a/pythainlp/tokenize/etcc.py b/pythainlp/tokenize/etcc.py
new file mode 100644
index 000000000..5a7b599a0
--- /dev/null
+++ b/pythainlp/tokenize/etcc.py
@@ -0,0 +1,60 @@
+'''
+โปรแกรม ETCC ใน Python
+
+พัฒนาโดย นาย วรรณพงษ์  ภัททิยไพบูลย์
+
+19 มิ.ย. 2560
+
+วิธีใช้งาน
+etcc(คำ)
+คืนค่า โดยมี / แบ่งกลุ่มคำ
+'''
+import re
+C=['ก','ข','ฃ','ค','ฅ','ฆ','ง','จ','ฉ','ช','ฌ','ซ','ศ','ษ','ส','ญ','ฎ','ฑ','ด','ฏ','ต','ฐ','ฑ','ฒ','ถ','ท','ธ','ณ','น','บ','ป','ผ','พ','ภ','ฝ','ฟ','ม','ย','ร','ล','ฬ','ว','ห','ฮ']
+UV=['็','ี','ื','ิ']
+UV1=['ั','ี']
+LV=['ุ','ู']
+c='['+''.join(C)+']'
+uv2='['+''.join(['ั','ื'])+']'
+def etcc(text):
+    """
+    Enhanced Thai Character Cluster (ETCC)
+    คั่นด้วย /
+    รับ str
+    ส่งออก str
+    """
+    if (re.search('[เแ]'+c+'['+''.join(UV)+']'+'\w',text,re.U)):
+        search=re.findall('[เแ]'+c+'['+''.join(UV)+']'+'\w',text,re.U)
+        for i in search:
+            text=re.sub(i, '/'+i+'/', text)
+    if (re.search(c+'['+''.join(UV1)+']'+c+c+'ุ'+'์',text,re.U)):
+        search=re.findall(c+'['+''.join(UV1)+']'+c+c+'ุ'+'์',text,re.U)
+        for i in search:
+            text=re.sub(i, '//'+i+'/', text)
+    if (re.search(c+uv2+c,text,re.U)):
+        search=re.findall(c+uv2+c,text,re.U)
+        for i in search:
+            text=re.sub(i, '/'+i+'/', text)    
+    re.sub('//','/',text)
+    if (re.search('เ'+c+'า'+'ะ',text,re.U)):
+        search=re.findall('เ'+c+'า'+'ะ',text,re.U)
+        for i in search:
+            text=re.sub(i, '/'+i+'/', text)   
+    if (re.search('เ'+'\w\w'+'า'+'ะ',text,re.U)):
+        search=re.findall('เ'+'\w\w'+'า'+'ะ',text,re.U)
+        for i in search:
+            text=re.sub(i, '/'+i+'/', text)   
+    text=re.sub('//','/',text)
+    if (re.search(c+'['+''.join(UV1)+']'+c+c+'์',text,re.U)):
+        search=re.findall(c+'['+''.join(UV1)+']'+c+c+'์',text,re.U)
+        for i in search:
+            text=re.sub(i, '/'+i+'/', text)   
+    if (re.search('/'+c+''.join(['ุ', '์'])+'/',text,re.U)):
+        '''แก้ไขในกรณี พัน/ธุ์'''
+        search=re.findall('/'+c+''.join(['ุ', '์'])+'/',text,re.U)
+        for i in search:
+            ii=re.sub('/','', i) 
+            text=re.sub(i,ii+'/', text)   
+    return re.sub('//','/',text)
+if __name__ == '__main__':
+    print(etcc('พันธุ์เด็กเปียเสือเงินพังมือเพราะเกาะเอาะยีนส์เพราะเรือดีเพราะ'))
\ No newline at end of file
diff --git a/pythainlp/segment/isthai.py b/pythainlp/tokenize/isthai.py.old
similarity index 100%
rename from pythainlp/segment/isthai.py
rename to pythainlp/tokenize/isthai.py.old
diff --git a/pythainlp/tokenize/mm.py b/pythainlp/tokenize/mm.py
new file mode 100644
index 000000000..28fbf1cb7
--- /dev/null
+++ b/pythainlp/tokenize/mm.py
@@ -0,0 +1,296 @@
+# -*- coding: utf-8 -*-
+
+'''
+based on algorithm from
+http://www.aclweb.org/anthology/E14-4016
+
+fork from https://github.com/narongdejsrn/pythaiwordcut
+License: MIT 
+'''
+
+from __future__ import print_function
+from six.moves import range,zip
+import codecs
+import re
+from .thai import newdata # load dictionary
+from pythainlp.corpus import stopwords # load  stopwords
+import marisa_trie
+
+class wordcut(object):
+    """
+    ตัดคำภาษาไทยด้วย Maximum Matching algorithm
+    """
+    def __init__(self, removeRepeat=True, keyDictionary="", stopDictionary="", removeSpaces=True, minLength=1, stopNumber=False, removeNonCharacter=False, caseSensitive=True, ngram=(1,1), negation=False):
+        d = newdata() # load dictionary
+        # load negation listdir
+        self.negationDict = []
+        if negation:
+            self.negationDict = ['ไม่','แต่']
+        self.stopword = False
+        self.stopdict = []
+        if(stopDictionary is not ""):
+            self.stopword = True
+            with codecs.open(stopDictionary, 'r',encoding='utf8') as f:
+                for line in f:
+                    self.stopdict.append(line)
+        else:
+            self.stopdict = stopwords.words('thai')
+        self.keyword = False
+        self.keydict = []
+        if(keyDictionary is not ""):
+            self.keyword = True
+            with codecs.open(keyDictionary, 'r',encoding='utf8') as f:
+                for line in f.read().splitlines():
+                    self.keydict.append(line)
+
+        self.trie = marisa_trie.Trie(d)
+        self.removeRepeat = removeRepeat
+        self.stopNumber = stopNumber
+        self.removeSpaces = removeSpaces
+        self.minLength = minLength
+        self.removeNonCharacter = removeNonCharacter
+        self.caseSensitive = caseSensitive
+        self.ngram = ngram
+        self.negation = negation
+        self.onNegation = False
+
+    def determine(self, word):
+        if self.stopNumber and word.isdigit():
+            return False
+
+        if self.removeSpaces and word.isspace():
+            return False
+
+        if len(word) < self.minLength:
+            return False
+
+        if self.removeNonCharacter:
+            match = re.search(u"[0-9A-Za-z\u0E00-\u0E7F]+", word,re.U)
+            if not match:
+                return False
+
+        return True
+
+    # Find maximum matching in Trie if match return id else return -1
+    def search_trie(self, word):
+        # remove negation if see a space
+        if(word[0:1] == " "):
+            self.onNegation = False
+
+        # check latin words
+        match = re.search(u"[A-Za-z\d]*", word,re.U)
+        if match.group(0):
+            if not self.caseSensitive:
+                return match.group(0).lower()
+            else:
+                return match.group(0)
+
+        # check number
+        match = re.search(u"[\d]*", word,re.U)
+        if match.group(0):
+            return match.group(0)
+
+        longest = 0
+        max_data = None
+
+        for x in range(20):
+            if word[0:x] in self.trie:
+                longest = len(word[0:x])
+                max_data = word[0:x]
+
+        if longest > 20:
+            for data in self.trie.keys(word[0:longest]):
+                if len(data) > longest and data in word[0:len(data)]:
+                    longest = len(data)
+                    max_data = data
+
+
+        if max_data:
+            try:
+                # Special check for case like ๆ
+                if word[len(max_data)] == 'ๆ':
+                    return word[0:(len(max_data) + 1)]
+                else:
+                    return max_data
+            except:
+                return max_data
+        else:
+            return -1
+
+    def transform(self, wordArray):
+        for dd in self.stopdict:
+            try:
+                if self.caseSensitive:
+                    wordArray.remove(dd)
+                else:
+                    wordArray.remove(dd.lower())
+            except ValueError:
+                pass
+
+        return wordArray
+
+    def extract_keyword(self, wordArray):
+        """
+        ใช้ในการหาคำสำคัญ
+        """
+        result_array = []
+        for dd in wordArray:
+            try:
+                if self.caseSensitive and dd in self.keydict:
+                    result_array.append(dd)
+                else:
+                    if dd.lower() in self.keydict:
+                        result_array.append(dd)
+            except ValueError:
+                pass
+
+        return result_array
+    # c = sentence which represent as char
+    # N = number of character
+    def find_segment(self, c):
+        i = 0
+        N = len(c)
+        arr = []
+        while(i < N):
+            j = self.search_trie(c[i:N])
+            if(j == -1):
+                if(self.removeRepeat is False or c[i] != c[i - 1]):
+                    arr.append(c[i])
+                    i = i + 1
+                else:
+                    i = i + 1
+            else:
+                k = j
+                if self.negation:
+                    if self.onNegation:
+                        k = 'NOT_' + j
+
+                    if j in self.negationDict:
+                        self.onNegation = True
+
+                arr.append(k)
+                i = i + len(j)
+        return arr
+
+    def find_ngrams(self, input_list, n):
+        return zip(*[input_list[i:] for i in range(n)])
+
+    def segment(self, c):
+        '''
+        ตัดคำใช้ฟังก์ชัน segment
+        '''
+        result = self.find_segment(c)
+        if self.stopword:
+            result = self.transform(result)
+
+        result = [x for x in result if self.determine(x)]
+
+        lastresult = []
+        for x in range(self.ngram[0], self.ngram[1]+1):
+            for r in self.find_ngrams(result, x):
+                match = re.search(u"[A-Za-z\d]+", ''.join(r),re.U)
+                if not match:
+                    lastresult.append(''.join(r))
+                else:
+                    if self.negation:
+                        lastresult.append(''.join(r))
+                    else:
+                        lastresult.append(' '.join(r))
+        return lastresult
+def mergelistlen(listdata,lennum):
+    '''
+    แก้ Bug ที่เกิดจาก mm
+    '''
+    i=0
+    listlen=len(listdata)
+    while i<listlen:
+        if i>(listlen-1) or i+1==listlen:
+            '''
+            ถ้า i เกินความยาว list ให้ออกจากการลูป
+            '''
+            break
+        elif re.search(r'[0-9]',listdata[i]):
+            '''
+            ถ้าหาก listdata[i] เป็นตัวเลขให้ข้ามไป
+            '''
+            pass
+        elif re.search(r'[ะา]',listdata[i]) and (len(listdata[i])==lennum and len(listdata[i+1])==lennum):
+            '''
+            ถ้าหาก listdata[i] คือ ะ/า ซึ่งเปนสระที่ไว้ข้างหลังได้เท่านั้น และ listdata[i] กับ listdata[i+1] ยาวเท่า lennum
+            จึงนำ listdata[i] ไปรวมกับ listdata[i-1] แล้วลบ listdata[i] ออก
+            '''
+            listdata[i-1]+=listdata[i]
+            del listdata[i]
+            i-=1
+        elif re.search(r'[ก-ฮ]',listdata[i]) and re.search(r'[0-9]',listdata[i+1]):
+            '''
+            กันปัญหา ก-ฮ ตัวต่อมาเป็น 0-9 มีความยาวเท่ากัน ให้ ก-ฮ ไปรวมกับตัวก่อนหน้า
+            '''
+            listdata[i-1]+=listdata[i]
+            del listdata[i]
+            i-=1
+        elif len(listdata[i])==lennum and len(listdata[i+1])==lennum:
+            '''
+            ถ้าหาก list มีความยาวเท่ากันอยู่ติดกัน
+            '''
+            #print(listdata,'99')
+            T=True
+            num=1
+            while T==True:
+               if (i+num)>=listlen:
+                   ii=i
+                   num2=1
+                   TT=True
+                   while TT==True:
+                    if (i+num2)<=(listlen-1):
+                        listdata[i]+=listdata[i+num2]
+                        num2+=1
+                    elif (i+num2)>(listlen-1):
+                        num2-=1
+                        TT=False
+                   TT=True
+                   while TT==True:
+                    if (i+num2) != i:
+                        del listdata[i+num2]
+                        num2-=1
+                    else:
+                        TT=False
+                   T=False
+               elif len(listdata[i+(num-1)])!=len(listdata[i+num]): #and re.search(r'[0-9]',listdata[i+(num-1)])==False:# and isThai(listdata[i+(num-1)])==True:
+                    ii=1+i
+                    while ii<(i+num) and ii<(len(listdata)-1):
+                        listdata[i]+=listdata[ii]
+                        ii+=1
+                    ii=i+num-1
+                    while ii>i:
+                        del listdata[ii]
+                        ii-=1
+                    T=False
+               num+=1
+            del T,ii
+        elif len(listdata[i])==lennum and len(listdata[i+1])!=lennum:
+            '''
+            ในกรณีที่ list ความยาวที่กำหนด แต่ตัวต่อไปยาวไม่เท่า ให้ยุบรวมกัน
+            '''
+            if re.search(r'[เแโใไ]',listdata[i]):
+                '''
+                ถ้าหากเป็นสระต้นคำ ให้รวมกัน
+                '''
+                listdata[i]+=listdata[i+1]
+                del listdata[i+1]
+            elif re.search(r'[ก-ฮ]',listdata[i]) or re.search(r'[ะา]',listdata[i]):
+                '''
+                หากเป็นแค่พยัญชนะให้รวมกับตัวหลัง
+                '''
+                listdata[i-1]+=listdata[i]
+                del listdata[i]
+                i-=1
+        listlen=len(listdata)
+        i+=1
+    return listdata
+def segment(text):
+    '''
+    ใช้ในการตัดตำ segment(str) คืนค่า list
+    '''
+    pt = wordcut(stopNumber=False, removeNonCharacter=True, caseSensitive=False,removeRepeat=True)
+    return mergelistlen(pt.segment(text),1)
\ No newline at end of file
diff --git a/pythainlp/tokenize/newmm.py b/pythainlp/tokenize/newmm.py
new file mode 100644
index 000000000..b300e0e34
--- /dev/null
+++ b/pythainlp/tokenize/newmm.py
@@ -0,0 +1,114 @@
+'''
+ตัดคำภาษาไทยโดยใช้ Maximum Matching algorithm
+เดติดโค้ดต้นฉบับ คุณ Korakot Chaovavanich
+จาก https://www.facebook.com/groups/408004796247683/permalink/431283740586455/ 
+'''
+from marisa_trie import Trie
+from collections import Counter, defaultdict
+from pythainlp.corpus.thaiword import get_data
+class LatticeString(str):
+    def __new__(cls, value, multi=None, in_dict=True): 
+        ''' Return a string instance 
+        ''' 
+        return str.__new__(cls, value)
+    
+    def __init__(self, value, multi=None, in_dict=True):
+        self.unique = True
+        if multi:
+            self.multi = list(multi)
+            if len(self.multi) > 1:
+                self.unique = False
+        else:
+            self.multi = [value]
+            
+        self.in_dict = in_dict   # บอกว่าเป็นคำมีในดิกหรือเปล่า
+
+    def suggest(self):
+        return []
+def serialize(p, p2):
+    for w in words_at[p]:
+        p_ = p + len(w)
+        if p_== p2:
+            yield w
+        elif p_ < p2:
+            for path in serialize(p_, p2):
+                yield w+'/'+path
+# มี jigsaw พร้อมแล้ว  ต่อไปก็ลองเขียน tcut ใหม่
+def tcut(text):
+    #global last_p, i, q, ww   # for debug
+    trie = Trie(get_data())
+    words_at = defaultdict(list)  # main data structure
+    
+    def serialize(p, p2):    # helper function
+        for w in words_at[p]:
+            p_ = p + len(w)
+            if p_== p2:
+                yield w
+            elif p_ < p2:
+                for path in serialize(p_, p2):
+                    yield w+'/'+path
+                    
+    q = {0}
+    last_p = 0   # last position for yield
+    while min(q) < len(text):
+        p = min(q)
+        q -= {p}  # q.pop, but for set
+        
+        for w in trie.prefixes(text[p:]):
+            words_at[p].append(w)
+            q.add(p+len(w))   
+            
+        if len(q)==1:
+            q0 = min(q)
+            yield LatticeString(text[last_p:q0], serialize(last_p, q0))
+            last_p = q0
+            
+        # กรณี len(q) == 0  คือ ไม่มีใน dict
+        if len(q)==0:
+            # skip น้อยที่สุด ที่เป็นไปได้
+            for i in range(p, len(text)):
+                ww = trie.prefixes(text[i:])
+                if ww:
+                    break
+            else:
+                i = len(text)
+            w = text[p:i]
+            w = w.replace(' ','') # ลบค่าที่ว่าง
+            words_at[p].append(w)
+            yield LatticeString(w, in_dict=False)
+            last_p = i
+            q.add(i)
+def mmcut(text):
+    '''
+	ใช้ Maximum Matching algorithm ในการตัดคำภาษาไทย
+    '''
+    res = []
+    for w in tcut(text):
+        if w.unique:
+            res.append(w)
+        else:
+            mm = min(w.multi, key=lambda x: x.count('/'))
+            res.extend(mm.split('/'))
+    return res
+def combine(ww):
+    if ww == []:
+        yield ""
+    else:
+        w = ww[0]
+        for tail in combine(ww[1:]):
+            if w.unique:
+                yield w+"|"+tail
+            else:
+                for m in w.multi:
+                    yield m.replace("/","|")+"|"+tail
+                    
+def listcut(text):
+    '''
+	ใช้ในการหา list ที่สามารถตัดคำได้ทั้งหมด
+	'''
+    ww = list(tcut(text))
+    return list(combine(ww))
+if __name__ == "__main__":
+	text='ผมรักคุณนะครับโอเคบ่พวกเราเป็นคนไทยรักภาษาไทยภาษาบ้านเกิด'
+	print(mmcut(text))
+	print(listcut(text))
\ No newline at end of file
diff --git a/pythainlp/segment/pyicu.py b/pythainlp/tokenize/pyicu.py
similarity index 60%
rename from pythainlp/segment/pyicu.py
rename to pythainlp/tokenize/pyicu.py
index 91539b01d..0a1d2a8ef 100644
--- a/pythainlp/segment/pyicu.py
+++ b/pythainlp/tokenize/pyicu.py
@@ -1,36 +1,48 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import,print_function,unicode_literals
 from itertools import groupby
-import PyICU
+from langdetect import detect 
+import re
+import icu
 def isEnglish(s):
-    try:
-        try:
-            s.encode('ascii')
-        except UnicodeEncodeError:
-            return False
-        else:
-            return True
-    except:
-        try:
-            s.decode('ascii')
-        except UnicodeDecodeError:
-            return False
-        else:
-            return True
+	'''
+	เช็คว่าตัวอักษรเป็นภาษาอังกฤษหรือไม่
+	'''
+	try:
+		try:
+			s.encode('ascii')
+		except UnicodeEncodeError:
+			return False
+		else:
+			return True
+	except:
+		try:
+			s.decode('ascii')
+		except UnicodeDecodeError:
+			return False
+		else:
+			return True
 def isThai(chr):
-    if isEnglish(chr):
-        return False
-    try:
-        cVal = ord(chr)
-        if(cVal >= 3584 and cVal <= 3711):
-            return True
-        return False
-    except:
-        return False
+	'''
+	เช็คตัวอักษรว่าใช่ภาษาไทยไหม
+	'''
+	if isEnglish(chr):
+		return False
+	try:
+		'''cVal = ord(chr)
+		if(cVal >= 3584 and cVal <= 3711):
+		return True'''
+		if detect(chr)=='th':
+			return True
+		else:
+			return False
+	except:
+		return False
 def segment(txt):
     """รับค่า ''str'' คืนค่าออกมาเป็น ''list'' ที่ได้มาจากการตัดคำโดย ICU"""
-    bd = PyICU.BreakIterator.createWordInstance(PyICU.Locale("th"))
-    bd.setText(txt.replace(' ', ''))
+    bd = icu.BreakIterator.createWordInstance(icu.Locale("th"))
+    pattern = re.compile(r'\s+')
+    bd.setText(re.sub(pattern, '', txt))
     breaks = list(bd)
     result=[txt[x[0]:x[1]] for x in zip([0]+breaks, breaks)]
     result1=[]
@@ -62,3 +74,5 @@ def segment(txt):
 	print(segment('ทดสอบระบบตัดคำด้วยไอซียู'))
 	print(segment('ผมชอบพูดไทยคำ English'))
 	print(segment('ผมชอบพูดไทยคำEnglishคำ'))
+	print(segment('ผมชอบพูดไทยคำEnglish540 บาท'))
+	print(segment('ประหยัด ไฟเบอห้า'))
diff --git a/pythainlp/tokenize/pylexto.py b/pythainlp/tokenize/pylexto.py
new file mode 100644
index 000000000..92ecb8158
--- /dev/null
+++ b/pythainlp/tokenize/pylexto.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import,unicode_literals
+import sys
+try:
+    from pylexto import LexTo
+except ImportError:
+	import pip
+	pip.main(['install','https://github.com/wannaphongcom/pylexto/archive/master.zip'])
+	try:
+		from pylexto import LexTo
+	except ImportError:
+		sys.exit('Error ! using pip install https://github.com/wannaphongcom/pylexto/archive/master.zip')
+def segment(text):
+    lexto = LexTo()
+    words, types = lexto.tokenize(text)
+    return words
\ No newline at end of file
diff --git a/pythainlp/tokenize/tcc.py b/pythainlp/tokenize/tcc.py
new file mode 100644
index 000000000..59086b0d3
--- /dev/null
+++ b/pythainlp/tokenize/tcc.py
@@ -0,0 +1,70 @@
+"""
+โปรแกรม TCC ภาษาไทย
+เดติด
+TCC : Mr.Jakkrit TeCho
+grammar : คุณ Wittawat Jitkrittum (https://github.com/wittawatj/jtcc/blob/master/TCC.g)
+โค้ด : คุณ Korakot Chaovavanich 
+"""
+import re
+pat_list = """\
+เc็c
+เcctาะ
+เccีtยะ
+เccีtย(?=[เ-ไก-ฮ]|$)
+เccอะ
+เcc็c
+เcิc์c
+เcิtc
+เcีtยะ?
+เcืtอะ?
+เc[ิีุู]tย(?=[เ-ไก-ฮ]|$)
+เctา?ะ?
+cัtวะ
+c[ัื]tc[ุิะ]?
+c[ิุู]์
+c[ะ-ู]t
+c็
+ct[ะาำ]?
+แc็c
+แcc์
+แctะ
+แcc็c
+แccc์
+โctะ
+[เ-ไ]ct
+ๆ
+ฯลฯ
+ฯ
+""".replace('c','[ก-ฮ]').replace('t', '[่-๋]?').split()
+'''
+def tcc(w):
+    p = 0 # position
+    while p<len(w):
+        for pat in pat_list:
+            m = re.match(pat, w[p:])
+            if m:
+                n = m.span()[1]
+                break
+            else: # กรณีหาไม่เจอ
+                n = 1
+        yield w[p:p+n]
+        p += n
+'''
+def tcc1(w):
+    p = 0
+    pat = re.compile("|".join(pat_list))
+    while p<len(w):
+        m = pat.match(w[p:])
+        if m:
+            n = m.span()[1]
+        else:
+            n = 1
+        yield w[p:p+n]
+        p += n
+def tcc(w, sep='/'):
+    return sep.join(tcc1(w))
+if __name__ == '__main__':
+    print(tcc('แมวกิน'))
+    print(tcc('ประชาชน'))
+    print(tcc('ขุด')+'/'+tcc('หลุม'))
+    print(tcc('ยินดี'))
\ No newline at end of file
diff --git a/pythainlp/segment/thai.py b/pythainlp/tokenize/thai.py
similarity index 100%
rename from pythainlp/segment/thai.py
rename to pythainlp/tokenize/thai.py
diff --git a/pythainlp/util/__init__.py b/pythainlp/util/__init__.py
new file mode 100644
index 000000000..578a12ed9
--- /dev/null
+++ b/pythainlp/util/__init__.py
@@ -0,0 +1,11 @@
+﻿# -*- coding: utf-8 -*-
+from nltk.util import ngrams as ngramsdata
+def ngrams(token,num):
+	'''
+	ngrams สร้าง ngrams
+	
+	ngrams(token,num)
+	- token คือ list
+	- num คือ จำนวน ngrams
+	'''
+	return ngramsdata(token,int(num))
\ No newline at end of file
diff --git a/requirements-travis.txt b/requirements-travis.txt
new file mode 100644
index 000000000..73a7a63a9
--- /dev/null
+++ b/requirements-travis.txt
@@ -0,0 +1,6 @@
+pyicu==1.9.3
+nltk>=3.2.2
+future>=0.16.0
+six
+marisa_trie
+langdetect
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..d4f03bec2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+pyicu
+nltk>=3.2.2
+future>=0.16.0
+six
+marisa_trie
+langdetect
diff --git a/setup.py b/setup.py
index 0659102e4..e3eea66a3 100644
--- a/setup.py
+++ b/setup.py
@@ -6,11 +6,10 @@
     'pyicu',
     'nltk>=3.2.2',
     'future>=0.16.0',
-    'six'
+    'six',
+    'marisa_trie',
+    'langdetect'
 ]
-if sys.version_info >= (3,4):
-    requirements.append('artagger')
-    requirements.append('dill')
 
 test_requirements = [
     # TODO: put package test requirements here
@@ -18,14 +17,14 @@
 
 setup(
     name='pythainlp',
-    version='1.3',
+    version='1.4',
     description="Thai NLP in python package.",
     author='Wannaphong Phatthiyaphaibun',
     author_email='wannaphong@yahoo.com',
     url='https://github.com/wannaphongcom/pythainlp',
     packages=find_packages(),
     test_suite='pythainlp.test',
-    package_data={'pythainlp.corpus':['thaipos.json','thaiword.txt','LICENSE_THA_WN','tha-wn.db','new-thaidict.txt','negation.txt'],'pythainlp.sentiment':['vocabulary.data','sentiment.data']},
+    package_data={'pythainlp.corpus':['stopwords-th.txt','thaipos.json','thaiword.txt','corpus_license.md','tha-wn.db','new-thaidict.txt','negation.txt'],'pythainlp.sentiment':['vocabulary.data','sentiment.data']},
     include_package_data=True,
     install_requires=requirements,
     license='Apache Software License 2.0',
@@ -38,4 +37,4 @@
         'Natural Language :: Thai',
         'Topic :: Text Processing :: Linguistic',
         'Programming Language :: Python :: Implementation'],
-)
\ No newline at end of file
+)