diff --git a/.buildinfo b/.buildinfo index 689ea68..dcfdbac 100644 --- a/.buildinfo +++ b/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 79634596c65bfaa599ac34c87302859b +config: 9e92a1f4664e6175bb0baeb8e9a47546 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/_images/notebooks_pythainlp_chunk_6_0.svg b/_images/notebooks_pythainlp_chunk_10_0.svg similarity index 100% rename from _images/notebooks_pythainlp_chunk_6_0.svg rename to _images/notebooks_pythainlp_chunk_10_0.svg diff --git a/_images/notebooks_pythainlp_chunk_7_0.svg b/_images/notebooks_pythainlp_chunk_11_0.svg similarity index 100% rename from _images/notebooks_pythainlp_chunk_7_0.svg rename to _images/notebooks_pythainlp_chunk_11_0.svg diff --git a/_images/notebooks_pythainlp_chunk_12_0.svg b/_images/notebooks_pythainlp_chunk_12_0.svg new file mode 100644 index 0000000..2d1ccf7 --- /dev/null +++ b/_images/notebooks_pythainlp_chunk_12_0.svg @@ -0,0 +1 @@ +SNPทำไมNCMNเขาPPRSVPรักVACTคุณPPRS \ No newline at end of file diff --git a/_images/notebooks_pythainlp_chunk_13_0.svg b/_images/notebooks_pythainlp_chunk_13_0.svg new file mode 100644 index 0000000..c3165c9 --- /dev/null +++ b/_images/notebooks_pythainlp_chunk_13_0.svg @@ -0,0 +1 @@ +SNPคนNCMNอะไรPNTRVPอยู่VSTAหลังRPREต้นไม้NCMN \ No newline at end of file diff --git a/_images/notebooks_pythainlp_chunk_4_0.svg b/_images/notebooks_pythainlp_chunk_4_0.svg deleted file mode 100644 index ea9fdb7..0000000 --- a/_images/notebooks_pythainlp_chunk_4_0.svg +++ /dev/null @@ -1 +0,0 @@ -SNPแมวNCMNVPกินVACTปลาNCMN \ No newline at end of file diff --git a/_images/notebooks_pythainlp_chunk_5_0.svg b/_images/notebooks_pythainlp_chunk_5_0.svg deleted file mode 100644 index 953ad04..0000000 --- a/_images/notebooks_pythainlp_chunk_5_0.svg +++ /dev/null @@ -1 +0,0 @@ -SNPคนNCMNหนองคายNCMNVPเป็นVSTAคนNCMNน่ารักVATT \ No newline at end of file diff --git a/_images/notebooks_pythainlp_chunk_8_0.svg b/_images/notebooks_pythainlp_chunk_8_0.svg index 2d1ccf7..ea9fdb7 100644 --- a/_images/notebooks_pythainlp_chunk_8_0.svg +++ b/_images/notebooks_pythainlp_chunk_8_0.svg @@ -1 +1 @@ -SNPทำไมNCMNเขาPPRSVPรักVACTคุณPPRS \ No newline at end of file +SNPแมวNCMNVPกินVACTปลาNCMN \ No newline at end of file diff --git a/_images/notebooks_pythainlp_chunk_9_0.svg b/_images/notebooks_pythainlp_chunk_9_0.svg index c3165c9..953ad04 100644 --- a/_images/notebooks_pythainlp_chunk_9_0.svg +++ b/_images/notebooks_pythainlp_chunk_9_0.svg @@ -1 +1 @@ -SNPคนNCMNอะไรPNTRVPอยู่VSTAหลังRPREต้นไม้NCMN \ No newline at end of file +SNPคนNCMNหนองคายNCMNVPเป็นVSTAคนNCMNน่ารักVATT \ No newline at end of file diff --git a/_sources/notebooks/pythainlp_chunk.ipynb b/_sources/notebooks/pythainlp_chunk.ipynb index 9ea47f9..931095b 100644 --- a/_sources/notebooks/pythainlp_chunk.ipynb +++ b/_sources/notebooks/pythainlp_chunk.ipynb @@ -1,275 +1,328 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "eCfShB9fUSqO" + }, + "source": [ + "# Thai Chunk Parser\n", + "\n", + "This tutorial demonstrates how to use the `chunk_parse` function from the PyThaiNLP library for parsing Thai text into phrases. We will use a chunking model trained on ORCHID++ corpus. \n", + "\n", + "Read more: https://github.com/PyThaiNLP/pythainlp/pull/524" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will need the following libraries and packages: \n", + "- PyThaiNLP\n", + "- NLTK (to preprocess chunk data for visualization)\n", + "- svgling (for visualization)\n", + "- python-crfsuite" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { "colab": { - "provenance": [] + "base_uri": "https://localhost:8080/" }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "id": "JvwrS6MDhitW", + "outputId": "ab197d92-b537-4974-e1b5-6bdaa7b8cefd" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: pythainlp in /usr/local/lib/python3.10/dist-packages (4.0.2)\n", + "Requirement already satisfied: svgling in /usr/local/lib/python3.10/dist-packages (0.3.1)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.8.1)\n", + "Collecting python-crfsuite\n", + " Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m993.5/993.5 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from pythainlp) (2.31.0)\n", + "Requirement already satisfied: svgwrite in /usr/local/lib/python3.10/dist-packages (from svgling) (1.4.3)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.6)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk) (1.3.2)\n", + "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2023.6.3)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk) (4.66.0)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2.0.4)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2023.7.22)\n", + "Installing collected packages: python-crfsuite\n", + "Successfully installed python-crfsuite-0.9.9\n" + ] } + ], + "source": [ + "!pip install pythainlp svgling nltk python-crfsuite" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "eCfShB9fUSqO" - }, - "source": [ - "# Thai Chunk Parser\n", - "\n", - "In PyThaiNLP, We use chunk data from ORCHID++ corpus.\n", - "\n", - "Read more: https://github.com/PyThaiNLP/pythainlp/pull/524" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "JvwrS6MDhitW", - "outputId": "ab197d92-b537-4974-e1b5-6bdaa7b8cefd" - }, - "source": [ - "!pip install pythainlp svgling nltk python-crfsuite" - ], - "execution_count": 5, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requirement already satisfied: pythainlp in /usr/local/lib/python3.10/dist-packages (4.0.2)\n", - "Requirement already satisfied: svgling in /usr/local/lib/python3.10/dist-packages (0.3.1)\n", - "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.8.1)\n", - "Collecting python-crfsuite\n", - " Downloading python_crfsuite-0.9.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (993 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m993.5/993.5 kB\u001b[0m \u001b[31m8.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hRequirement already satisfied: requests>=2.22.0 in /usr/local/lib/python3.10/dist-packages (from pythainlp) (2.31.0)\n", - "Requirement already satisfied: svgwrite in /usr/local/lib/python3.10/dist-packages (from svgling) (1.4.3)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.6)\n", - "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk) (1.3.2)\n", - "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2023.6.3)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from nltk) (4.66.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2.0.4)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.22.0->pythainlp) (2023.7.22)\n", - "Installing collected packages: python-crfsuite\n", - "Successfully installed python-crfsuite-0.9.9\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "ZPRRhKxrhlFA" - }, - "source": [ - "from pythainlp.tokenize import word_tokenize\n", - "from pythainlp.tag import pos_tag\n", - "from pythainlp.tag import chunk_parse\n", - "from nltk.chunk import conlltags2tree\n", - "import svgling" - ], - "execution_count": 1, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "bGD2uxMFhmh4" - }, - "source": [ - "def test(txt):\n", - " m = [(w,t) for w,t in pos_tag(word_tokenize(txt), engine= 'perceptron',corpus = 'orchid')]\n", - " tag = chunk_parse(m)\n", - " p = [(w,t,tag[i]) for i,(w,t) in enumerate(m)]\n", - " return p" - ], - "execution_count": 2, - "outputs": [] + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to import the following modules and functions:\n", + "- `word_tokenize` – this function takes a Thai text and returns a list of tokenized words\n", + "- `pos_tag` – this function takes a list of tokenized words and marks them with part-of-speech (POS) tags\n", + "- `chunk_parse` – this function takes words with their POS tags and marks them with inside-outside-beginning (IOB) tags\n", + "- `conlltags2tree` – this function is part of the NLTK and converts IOB format to a tree\n", + "- `svgling` – this package will be used to visualize the tree in SVG\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "ZPRRhKxrhlFA" + }, + "outputs": [], + "source": [ + "from pythainlp.tokenize import word_tokenize\n", + "from pythainlp.tag import pos_tag\n", + "from pythainlp.tag import chunk_parse\n", + "from nltk.chunk import conlltags2tree\n", + "import svgling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We define a new function `test`, which will first segment the input text into words (`word_tokenize`), tag the words with their parts of speech based on the ORCHID++ corpus (`pos_tag`) and perform chunking (`chunk_parse`). The function then combines the words, POS and IOB tags into a list of triples `p`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "bGD2uxMFhmh4" + }, + "outputs": [], + "source": [ + "def test(txt):\n", + " m = [(w,t) for w,t in pos_tag(word_tokenize(txt), engine= 'perceptron',corpus = 'orchid')]\n", + " tag = chunk_parse(m)\n", + " p = [(w,t,tag[i]) for i,(w,t) in enumerate(m)]\n", + " return p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we call the `test` function to chunk several example sentences. We then use the `svgling.draw_tree` function to visualize the syntactic trees, which were generated from the chunked data by the `conlltags2tree` function." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "ag8oszXfhoAZ", + "outputId": "b789de88-d812-44ca-d0d9-4f031127b68d" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "id": "ag8oszXfhoAZ", - "outputId": "b789de88-d812-44ca-d0d9-4f031127b68d" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"แมวกินปลา\")))" + "data": { + "image/svg+xml": [ + "SNPแมวNCMNVPกินVACTปลาNCMN" ], - "execution_count": 3, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('แมว', 'NCMN')]), Tree('VP', [('กิน', 'VACT'), ('ปลา', 'NCMN')])]))" - ], - "image/svg+xml": "SNPแมวNCMNVPกินVACTปลาNCMN" - }, - "metadata": {}, - "execution_count": 3 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('แมว', 'NCMN')]), Tree('VP', [('กิน', 'VACT'), ('ปลา', 'NCMN')])]))" ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"แมวกินปลา\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "L3COVriThp3B", + "outputId": "27256b8d-f265-49cb-c5f1-85fee90b79e4" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "id": "L3COVriThp3B", - "outputId": "27256b8d-f265-49cb-c5f1-85fee90b79e4" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"คนหนองคายเป็นคนน่ารัก\")))" + "data": { + "image/svg+xml": [ + "SNPคนNCMNหนองคายNCMNVPเป็นVSTAคนNCMNน่ารักVATT" ], - "execution_count": 4, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('หนองคาย', 'NCMN')]), Tree('VP', [('เป็น', 'VSTA'), ('คน', 'NCMN'), ('น่ารัก', 'VATT')])]))" - ], - "image/svg+xml": "SNPคนNCMNหนองคายNCMNVPเป็นVSTAคนNCMNน่ารักVATT" - }, - "metadata": {}, - "execution_count": 4 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('หนองคาย', 'NCMN')]), Tree('VP', [('เป็น', 'VSTA'), ('คน', 'NCMN'), ('น่ารัก', 'VATT')])]))" ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"คนหนองคายเป็นคนน่ารัก\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "YwaQNhLPib6Y", + "outputId": "1ebc2402-90bf-4a37-8b3e-60b62bb52bae" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "YwaQNhLPib6Y", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "outputId": "1ebc2402-90bf-4a37-8b3e-60b62bb52bae" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"ปลาอะไรอยู่ในน้ำ\")))" + "data": { + "image/svg+xml": [ + "SNPปลาNCMNอะไรPNTRVPอยู่VSTAในRPREน้ำNCMN" ], - "execution_count": 5, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('ปลา', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('ใน', 'RPRE'), ('น้ำ', 'NCMN')])]))" - ], - "image/svg+xml": "SNPปลาNCMNอะไรPNTRVPอยู่VSTAในRPREน้ำNCMN" - }, - "metadata": {}, - "execution_count": 5 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('ปลา', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('ใน', 'RPRE'), ('น้ำ', 'NCMN')])]))" ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"ปลาอะไรอยู่ในน้ำ\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "PB7AU2febneD", + "outputId": "32bfea36-c0e1-484a-dbb6-b77536124507" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "id": "PB7AU2febneD", - "outputId": "32bfea36-c0e1-484a-dbb6-b77536124507" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"ในน้ำมีอะไรอยู่\")))" + "data": { + "image/svg+xml": [ + "SNPในRPREน้ำNCMNVPมีVSTAอะไรPNTRอยู่XVAE" ], - "execution_count": 6, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('ใน', 'RPRE'), ('น้ำ', 'NCMN')]), Tree('VP', [('มี', 'VSTA'), ('อะไร', 'PNTR'), ('อยู่', 'XVAE')])]))" - ], - "image/svg+xml": "SNPในRPREน้ำNCMNVPมีVSTAอะไรPNTRอยู่XVAE" - }, - "metadata": {}, - "execution_count": 6 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('ใน', 'RPRE'), ('น้ำ', 'NCMN')]), Tree('VP', [('มี', 'VSTA'), ('อะไร', 'PNTR'), ('อยู่', 'XVAE')])]))" ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"ในน้ำมีอะไรอยู่\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "uu4KZ4OIbqy5", + "outputId": "c49b5cd2-680f-4a44-afe7-8c80368bffa8" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "id": "uu4KZ4OIbqy5", - "outputId": "c49b5cd2-680f-4a44-afe7-8c80368bffa8" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"ทำไมเขารักคุณ\")))" + "data": { + "image/svg+xml": [ + "SNPทำไมNCMNเขาPPRSVPรักVACTคุณPPRS" ], - "execution_count": 7, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('ทำไม', 'NCMN'), ('เขา', 'PPRS')]), Tree('VP', [('รัก', 'VACT'), ('คุณ', 'PPRS')])]))" - ], - "image/svg+xml": "SNPทำไมNCMNเขาPPRSVPรักVACTคุณPPRS" - }, - "metadata": {}, - "execution_count": 7 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('ทำไม', 'NCMN'), ('เขา', 'PPRS')]), Tree('VP', [('รัก', 'VACT'), ('คุณ', 'PPRS')])]))" ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"ทำไมเขารักคุณ\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 188 }, + "id": "xAsZ9PkvbxrG", + "outputId": "1d8c7932-ecf1-4671-a9f7-b2263e3dd80a" + }, + "outputs": [ { - "cell_type": "code", - "metadata": { - "id": "xAsZ9PkvbxrG", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 188 - }, - "outputId": "1d8c7932-ecf1-4671-a9f7-b2263e3dd80a" - }, - "source": [ - "svgling.draw_tree(conlltags2tree(test(\"คนอะไรอยู่หลังต้นไม้\")))" + "data": { + "image/svg+xml": [ + "SNPคนNCMNอะไรPNTRVPอยู่VSTAหลังRPREต้นไม้NCMN" ], - "execution_count": 8, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('หลัง', 'RPRE'), ('ต้นไม้', 'NCMN')])]))" - ], - "image/svg+xml": "SNPคนNCMNอะไรPNTRVPอยู่VSTAหลังRPREต้นไม้NCMN" - }, - "metadata": {}, - "execution_count": 8 - } + "text/plain": [ + "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('หลัง', 'RPRE'), ('ต้นไม้', 'NCMN')])]))" ] - }, - { - "cell_type": "code", - "metadata": { - "id": "SP3ZlCeQJWpq" - }, - "source": [], - "execution_count": 8, - "outputs": [] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } - ] -} \ No newline at end of file + ], + "source": [ + "svgling.draw_tree(conlltags2tree(test(\"คนอะไรอยู่หลังต้นไม้\")))" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} diff --git a/_static/documentation_options.js b/_static/documentation_options.js index 43bd01b..a129e62 100644 --- a/_static/documentation_options.js +++ b/_static/documentation_options.js @@ -1,6 +1,6 @@ var DOCUMENTATION_OPTIONS = { URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), - VERSION: 'thai2plot-18-g321b53d', + VERSION: 'thai2plot-20-gf4ed474', LANGUAGE: 'en', COLLAPSE_INDEX: false, BUILDER: 'html', diff --git a/genindex.html b/genindex.html index 28604cf..f563332 100644 --- a/genindex.html +++ b/genindex.html @@ -3,7 +3,7 @@ - Index — pythainlp-tutorials thai2plot-18-g321b53d documentation + Index — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/index.html b/index.html index d6c2263..b6d7f06 100644 --- a/index.html +++ b/index.html @@ -4,7 +4,7 @@ - Welcome to PyThaiNLP Tutorials — pythainlp-tutorials thai2plot-18-g321b53d documentation + Welcome to PyThaiNLP Tutorials — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/Han-Coref.html b/notebooks/Han-Coref.html index 9c70b62..12ab589 100644 --- a/notebooks/Han-Coref.html +++ b/notebooks/Han-Coref.html @@ -4,7 +4,7 @@ - 🪿 Han-Coref: Thai Coreference resolution by PyThaiNLP — pythainlp-tutorials thai2plot-18-g321b53d documentation + 🪿 Han-Coref: Thai Coreference resolution by PyThaiNLP — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/Thai_Dependency_Parser.html b/notebooks/Thai_Dependency_Parser.html index 3656b10..e62d57d 100644 --- a/notebooks/Thai_Dependency_Parser.html +++ b/notebooks/Thai_Dependency_Parser.html @@ -4,7 +4,7 @@ - Thai Dependency Parser — pythainlp-tutorials thai2plot-18-g321b53d documentation + Thai Dependency Parser — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/find_all_thai_rhyming_words.html b/notebooks/find_all_thai_rhyming_words.html index 78ea381..bec5d34 100644 --- a/notebooks/find_all_thai_rhyming_words.html +++ b/notebooks/find_all_thai_rhyming_words.html @@ -4,7 +4,7 @@ - Find all Thai rhyming words from Thai word — pythainlp-tutorials thai2plot-18-g321b53d documentation + Find all Thai rhyming words from Thai word — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/machine_translation.html b/notebooks/machine_translation.html index 199d2b4..80b4855 100644 --- a/notebooks/machine_translation.html +++ b/notebooks/machine_translation.html @@ -4,7 +4,7 @@ - PyThaiNLP Translate — pythainlp-tutorials thai2plot-18-g321b53d documentation + PyThaiNLP Translate — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/nlpo3ipynb.html b/notebooks/nlpo3ipynb.html index 16f93b4..993ef75 100644 --- a/notebooks/nlpo3ipynb.html +++ b/notebooks/nlpo3ipynb.html @@ -4,7 +4,7 @@ - nlpO3 — pythainlp-tutorials thai2plot-18-g321b53d documentation + nlpO3 — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/pythainlp_chunk.html b/notebooks/pythainlp_chunk.html index 552bf36..e8ebc63 100644 --- a/notebooks/pythainlp_chunk.html +++ b/notebooks/pythainlp_chunk.html @@ -4,7 +4,7 @@ - Thai Chunk Parser — pythainlp-tutorials thai2plot-18-g321b53d documentation + Thai Chunk Parser — pythainlp-tutorials thai2plot-20-gf4ed474 documentation @@ -104,13 +104,14 @@

Thai Chunk Parser

-

In PyThaiNLP, We use chunk data from ORCHID++ corpus.

+

This tutorial demonstrates how to use the chunk_parse function from the PyThaiNLP library for parsing Thai text into phrases. We will use a chunking model trained on ORCHID++ corpus.

Read more: https://github.com/PyThaiNLP/pythainlp/pull/524

+

We will need the following libraries and packages: - PyThaiNLP - NLTK (to preprocess chunk data for visualization) - svgling (for visualization) - python-crfsuite

-
[5]:
+
[1]:
 
-
!pip install pythainlp svgling nltk python-crfsuite
+
!pip install pythainlp svgling nltk python-crfsuite
 
@@ -139,50 +140,39 @@

Thai Chunk Parser -
[1]:
-
-
-
from pythainlp.tokenize import word_tokenize
-from pythainlp.tag import pos_tag
-from pythainlp.tag import chunk_parse
-from nltk.chunk import conlltags2tree
-import svgling
-
-
-

+

We need to import the following modules and functions: - word_tokenize – this function takes a Thai text and returns a list of tokenized words - pos_tag – this function takes a list of tokenized words and marks them with part-of-speech (POS) tags - chunk_parse – this function takes words with their POS tags and marks them with inside-outside-beginning (IOB) tags - conlltags2tree – this function is part of the NLTK and converts IOB format to a tree - svgling – this package +will be used to visualize the tree in SVG

[2]:
 
-
def test(txt):
-    m = [(w,t) for w,t in pos_tag(word_tokenize(txt), engine= 'perceptron',corpus = 'orchid')]
-    tag = chunk_parse(m)
-    p = [(w,t,tag[i]) for i,(w,t) in enumerate(m)]
-    return p
+
from pythainlp.tokenize import word_tokenize
+from pythainlp.tag import pos_tag
+from pythainlp.tag import chunk_parse
+from nltk.chunk import conlltags2tree
+import svgling
 
-
+

We define a new function test, which will first segment the input text into words (word_tokenize), tag the words with their parts of speech based on the ORCHID++ corpus (pos_tag) and perform chunking (chunk_parse). The function then combines the words, POS and IOB tags into a list of triples p.

+
-
-
[8]:
+
+
[9]:
 
-

+
svgling.draw_tree(conlltags2tree(test("คนอะไรอยู่หลังต้นไม้")))
 
+
+
[9]:
+
+
+
+../_images/notebooks_pythainlp_chunk_13_0.svg
+
diff --git a/notebooks/pythainlp_chunk.ipynb b/notebooks/pythainlp_chunk.ipynb index 0d63a0c..931095b 100644 --- a/notebooks/pythainlp_chunk.ipynb +++ b/notebooks/pythainlp_chunk.ipynb @@ -8,14 +8,25 @@ "source": [ "# Thai Chunk Parser\n", "\n", - "In PyThaiNLP, We use chunk data from ORCHID++ corpus.\n", + "This tutorial demonstrates how to use the `chunk_parse` function from the PyThaiNLP library for parsing Thai text into phrases. We will use a chunking model trained on ORCHID++ corpus. \n", "\n", "Read more: https://github.com/PyThaiNLP/pythainlp/pull/524" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We will need the following libraries and packages: \n", + "- PyThaiNLP\n", + "- NLTK (to preprocess chunk data for visualization)\n", + "- svgling (for visualization)\n", + "- python-crfsuite" + ] + }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -53,9 +64,21 @@ "!pip install pythainlp svgling nltk python-crfsuite" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We need to import the following modules and functions:\n", + "- `word_tokenize` – this function takes a Thai text and returns a list of tokenized words\n", + "- `pos_tag` – this function takes a list of tokenized words and marks them with part-of-speech (POS) tags\n", + "- `chunk_parse` – this function takes words with their POS tags and marks them with inside-outside-beginning (IOB) tags\n", + "- `conlltags2tree` – this function is part of the NLTK and converts IOB format to a tree\n", + "- `svgling` – this package will be used to visualize the tree in SVG\n" + ] + }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": { "id": "ZPRRhKxrhlFA" }, @@ -68,9 +91,16 @@ "import svgling" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We define a new function `test`, which will first segment the input text into words (`word_tokenize`), tag the words with their parts of speech based on the ORCHID++ corpus (`pos_tag`) and perform chunking (`chunk_parse`). The function then combines the words, POS and IOB tags into a list of triples `p`." + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "id": "bGD2uxMFhmh4" }, @@ -83,9 +113,16 @@ " return p" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, we call the `test` function to chunk several example sentences. We then use the `svgling.draw_tree` function to visualize the syntactic trees, which were generated from the chunked data by the `conlltags2tree` function." + ] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -104,7 +141,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('แมว', 'NCMN')]), Tree('VP', [('กิน', 'VACT'), ('ปลา', 'NCMN')])]))" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -115,7 +152,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -134,7 +171,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('หนองคาย', 'NCMN')]), Tree('VP', [('เป็น', 'VSTA'), ('คน', 'NCMN'), ('น่ารัก', 'VATT')])]))" ] }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -145,7 +182,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -164,7 +201,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('ปลา', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('ใน', 'RPRE'), ('น้ำ', 'NCMN')])]))" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -175,7 +212,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -194,7 +231,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('ใน', 'RPRE'), ('น้ำ', 'NCMN')]), Tree('VP', [('มี', 'VSTA'), ('อะไร', 'PNTR'), ('อยู่', 'XVAE')])]))" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -205,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -224,7 +261,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('ทำไม', 'NCMN'), ('เขา', 'PPRS')]), Tree('VP', [('รัก', 'VACT'), ('คุณ', 'PPRS')])]))" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -235,7 +272,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -254,7 +291,7 @@ "TreeLayout(Tree('S', [Tree('NP', [('คน', 'NCMN'), ('อะไร', 'PNTR')]), Tree('VP', [('อยู่', 'VSTA'), ('หลัง', 'RPRE'), ('ต้นไม้', 'NCMN')])]))" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -262,15 +299,6 @@ "source": [ "svgling.draw_tree(conlltags2tree(test(\"คนอะไรอยู่หลังต้นไม้\")))" ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "id": "SP3ZlCeQJWpq" - }, - "outputs": [], - "source": [] } ], "metadata": { @@ -278,10 +306,23 @@ "provenance": [] }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", + "language": "python", "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/notebooks/pythainlp_get_started.html b/notebooks/pythainlp_get_started.html index 1605f54..ed47ff6 100644 --- a/notebooks/pythainlp_get_started.html +++ b/notebooks/pythainlp_get_started.html @@ -4,7 +4,7 @@ - PyThaiNLP Get Started — pythainlp-tutorials thai2plot-18-g321b53d documentation + PyThaiNLP Get Started — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/pythainlp_wangchanberta.html b/notebooks/pythainlp_wangchanberta.html index df4d321..ed94a32 100644 --- a/notebooks/pythainlp_wangchanberta.html +++ b/notebooks/pythainlp_wangchanberta.html @@ -4,7 +4,7 @@ - Wangchanberta — pythainlp-tutorials thai2plot-18-g321b53d documentation + Wangchanberta — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/sentiment_analysis.html b/notebooks/sentiment_analysis.html index e468001..8ff2d57 100644 --- a/notebooks/sentiment_analysis.html +++ b/notebooks/sentiment_analysis.html @@ -4,7 +4,7 @@ - Wisesight Sentiment Analysis — pythainlp-tutorials thai2plot-18-g321b53d documentation + Wisesight Sentiment Analysis — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/spaCy_PyThaiNLP_demo.html b/notebooks/spaCy_PyThaiNLP_demo.html index b3f7f7f..a0e1c9f 100644 --- a/notebooks/spaCy_PyThaiNLP_demo.html +++ b/notebooks/spaCy_PyThaiNLP_demo.html @@ -4,7 +4,7 @@ - spaCy-PyThaiNLP — pythainlp-tutorials thai2plot-18-g321b53d documentation + spaCy-PyThaiNLP — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/text_classification.html b/notebooks/text_classification.html index 96930cf..0b12206 100644 --- a/notebooks/text_classification.html +++ b/notebooks/text_classification.html @@ -4,7 +4,7 @@ - Wongnai Review Classification — pythainlp-tutorials thai2plot-18-g321b53d documentation + Wongnai Review Classification — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/text_generation.html b/notebooks/text_generation.html index c74e1bf..b74e124 100644 --- a/notebooks/text_generation.html +++ b/notebooks/text_generation.html @@ -4,7 +4,7 @@ - Thai Wiki Language Model for Text Generation — pythainlp-tutorials thai2plot-18-g321b53d documentation + Thai Wiki Language Model for Text Generation — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/thai_wav2vec2_onnx.html b/notebooks/thai_wav2vec2_onnx.html index f66a982..503e149 100644 --- a/notebooks/thai_wav2vec2_onnx.html +++ b/notebooks/thai_wav2vec2_onnx.html @@ -4,7 +4,7 @@ - Thai Wav2vec2 model to ONNX model — pythainlp-tutorials thai2plot-18-g321b53d documentation + Thai Wav2vec2 model to ONNX model — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/wangchanberta_getting_started_aireseach.html b/notebooks/wangchanberta_getting_started_aireseach.html index d1bdb87..dc356d7 100644 --- a/notebooks/wangchanberta_getting_started_aireseach.html +++ b/notebooks/wangchanberta_getting_started_aireseach.html @@ -4,7 +4,7 @@ - WangchanBERTa: Getting Started Notebook — pythainlp-tutorials thai2plot-18-g321b53d documentation + WangchanBERTa: Getting Started Notebook — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/notebooks/word2vec_examples.html b/notebooks/word2vec_examples.html index 9b50b65..ce9433f 100644 --- a/notebooks/word2vec_examples.html +++ b/notebooks/word2vec_examples.html @@ -4,7 +4,7 @@ - Thai2Vec Embeddings Examples — pythainlp-tutorials thai2plot-18-g321b53d documentation + Thai2Vec Embeddings Examples — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/search.html b/search.html index ec723f7..429a6fc 100644 --- a/search.html +++ b/search.html @@ -3,7 +3,7 @@ - Search — pythainlp-tutorials thai2plot-18-g321b53d documentation + Search — pythainlp-tutorials thai2plot-20-gf4ed474 documentation diff --git a/searchindex.js b/searchindex.js index ff1f3fc..5594404 100644 --- a/searchindex.js +++ b/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["index", "notebooks/Han-Coref", "notebooks/Thai_Dependency_Parser", "notebooks/find_all_thai_rhyming_words", "notebooks/machine_translation", "notebooks/nlpo3ipynb", "notebooks/pythainlp_chunk", "notebooks/pythainlp_get_started", "notebooks/pythainlp_wangchanberta", "notebooks/sentiment_analysis", "notebooks/spaCy_PyThaiNLP_demo", "notebooks/text_classification", "notebooks/text_generation", "notebooks/thai_wav2vec2_onnx", "notebooks/wangchanberta_getting_started_aireseach", "notebooks/word2vec_examples"], "filenames": ["index.rst", "notebooks/Han-Coref.ipynb", "notebooks/Thai_Dependency_Parser.ipynb", "notebooks/find_all_thai_rhyming_words.ipynb", "notebooks/machine_translation.ipynb", "notebooks/nlpo3ipynb.ipynb", "notebooks/pythainlp_chunk.ipynb", "notebooks/pythainlp_get_started.ipynb", "notebooks/pythainlp_wangchanberta.ipynb", "notebooks/sentiment_analysis.ipynb", "notebooks/spaCy_PyThaiNLP_demo.ipynb", "notebooks/text_classification.ipynb", "notebooks/text_generation.ipynb", "notebooks/thai_wav2vec2_onnx.ipynb", "notebooks/wangchanberta_getting_started_aireseach.ipynb", "notebooks/word2vec_examples.ipynb"], "titles": ["Welcome to PyThaiNLP Tutorials", "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP", "Thai Dependency Parser", "Find all Thai rhyming words from Thai word", "PyThaiNLP Translate", "nlpO3", "Thai Chunk Parser", "PyThaiNLP Get Started", "Wangchanberta", "Wisesight Sentiment Analysis", "spaCy-PyThaiNLP", "Wongnai Review Classification", "Thai Wiki Language Model for Text Generation", "Thai Wav2vec2 model to ONNX model", "WangchanBERTa: Getting Started Notebook", "Thai2Vec Embeddings Examples"], "terms": {"i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "python": [0, 2, 3, 4, 5, 6, 8, 10, 13, 14], "librari": [0, 5], "thai": [0, 5, 8, 9, 10, 11, 14, 15], "natur": [0, 5], "languag": [0, 5, 7, 8, 14], "process": [0, 5, 7, 11, 13, 15], "han": 0, "coref": 0, "corefer": 0, "resolut": [0, 12], "depend": [0, 9, 10, 14], "parser": 0, "find": [0, 7, 9], "all": [0, 4, 7, 11, 12, 14, 15], "rhyme": 0, "word": [0, 5, 9, 10, 11, 12, 14], "from": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "translat": 0, "nlpo3": 0, "chunk": 0, "get": [0, 2, 9, 11, 12, 15], "start": [0, 1, 2, 12], "wangchanberta": 0, "wisesight": [0, 11], "sentiment": [0, 11], "analysi": [0, 14], "spaci": [0, 1, 2], "wongnai": 0, "review": 0, "classif": [0, 7, 9, 15], "wiki": [0, 11, 14], "model": [0, 4, 7, 8, 10, 15], "text": [0, 1, 4, 5, 7, 11, 13, 15], "gener": [0, 7, 14, 15], "wav2vec2": 0, "onnx": 0, "notebook": [0, 8, 9, 12, 13], "instal": [0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 15], "choos": 0, "pretrain": [0, 8, 9, 11, 12], "mask": 0, "token": [0, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15], "predict": [0, 9, 10, 11, 12, 13], "sequenc": [0, 7, 15], "document": [0, 15], "vector": [0, 15], "thai2vec": 0, "embed": [0, 9, 11, 12], "exampl": [0, 7, 9, 14], "apach": 0, "softwar": 0, "licens": 0, "2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "maintain": 0, "team": 0, "see": [0, 7, 11, 12, 14, 15], "sourc": [0, 2, 10], "code": [0, 7, 11, 13, 15], "http": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "github": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "com": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "main": [0, 7, 12, 13, 14], "develop": [0, 12, 14], "websit": 0, "io": [0, 10, 13], "interact": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "onlin": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "pip": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "q": [1, 9, 12], "fastcoref": 1, "transform": [1, 8, 9, 10, 12, 13, 14], "sentencepiec": [1, 4, 8, 14], "prepar": [1, 5, 10, 14], "metadata": [1, 2, 8, 10, 13, 14], "setup": [1, 2, 4, 7, 8, 10, 14], "py": [1, 2, 4, 7, 8, 9, 10, 13, 14, 15], "done": [1, 2, 4, 7, 8, 10, 12, 14], "13": [1, 2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "4": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "mb": [1, 3, 5, 6, 10, 13, 14, 15], "114": [1, 14, 15], "": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15], "eta": [1, 3, 6, 10, 11, 13, 14], "00": [1, 3, 4, 6, 7, 9, 10, 13, 14], "7": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "119": 1, "474": 1, "6": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "kb": [1, 3, 5, 6, 10, 13, 14, 15], "53": [1, 7, 11, 13], "110": [1, 5, 14, 15], "5": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "14": [1, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15], "9": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "212": 1, "25": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "134": 1, "3": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "17": [1, 2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "90": 1, "224": 1, "29": [1, 4, 7, 8, 9, 11, 14, 15], "8": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "95": [1, 4, 8], "268": 1, "32": [1, 7, 9, 10, 11, 14], "149": 1, "19": [1, 2, 3, 4, 7, 8, 9, 10, 13, 15], "build": [1, 2, 4, 8, 10, 14], "wheel": [1, 2, 4, 8, 10, 14], "import": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14], "spacy_compon": 1, "nlp": [1, 2, 10], "blank": [1, 10], "th": [1, 4, 9, 10, 11, 12, 13, 15], "add_pip": [1, 10], "config": [1, 9, 10, 11, 12, 13], "model_architectur": 1, "fcoref": 1, "model_path": [1, 9, 11, 15], "v1": 1, "lt": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14], "fastcorefresolv": 1, "0x7fbd9c2b6560": 1, "gt": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "random": [1, 9, 14], "def": [1, 6, 7, 13, 14, 15], "get2tag": 1, "tag": [1, 6, 8, 10, 12], "titl": [1, 14], "none": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "dic_ent": 1, "ent": [1, 10], "_tag": 1, "str": [1, 7], "list": [1, 3, 5, 7, 10, 13, 15], "rang": [1, 11, 14, 15], "len": [1, 3, 7, 9, 11, 12, 13, 15], "enumer": [1, 6, 15], "e": [1, 7, 10, 14], "append": [1, 3, 11, 13, 15], "end": [1, 7, 12, 14], "label": [1, 9, 11, 14, 15], "color": 1, "join": [1, 9, 11, 13], "choic": [1, 12], "0123456789abcdef": 1, "j": [1, 7, 13], "thank": 1, "stackoverflow": [1, 13], "50218895": 1, "return": [1, 6, 7, 13, 14, 15], "displaci": [1, 10], "\u0e2a\u0e32\u0e18": 1, "\u0e15": [1, 3, 5, 7, 8, 9, 11, 14, 15], "\u0e41\u0e08\u0e07\u0e27": 1, "\u0e19": [1, 7, 9, 10, 11, 12, 14, 15], "\u0e20\u0e32\u0e1e\u0e41\u0e04\u0e1b\u0e01\u0e25": 1, "\u0e21\u0e44\u0e25\u0e19": 1, "\u0e17": [1, 3, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e1b\u0e23": [1, 7, 11, 12], "\u0e01\u0e29\u0e32\u0e2f": 1, "\u0e01\u0e25": [1, 3, 9, 11], "\u0e32\u0e27\u0e23": 1, "\u0e32\u0e22": [1, 7, 9, 11], "\u0e1e": [1, 3, 7, 9, 11, 12, 14, 15], "\u0e18\u0e32": 1, "\u0e22": [1, 7, 8, 9, 11, 12, 14, 15], "\u0e44\u0e21": [1, 7, 8, 9, 11, 14, 15], "\u0e43\u0e0a": [1, 7, 9], "\u0e27\u0e40\u0e2d\u0e07": [1, 9], "\u0e41\u0e15": [1, 7, 8, 9, 11, 14], "\u0e40\u0e2b": [1, 9, 11], "\u0e19\u0e14": [1, 7, 9, 15], "\u0e27\u0e22\u0e27": 1, "\u0e32\u0e2d\u0e20": 1, "\u0e1b\u0e23\u0e32\u0e22\u0e14": 1, "\u0e2d\u0e22\u0e04": 1, "\u0e32\u0e1a\u0e33\u0e19\u0e32\u0e0d": 1, "\u0e02\u0e23\u0e01": 1, "doc": [1, 2, 10, 11, 13], "_": [1, 7, 9, 11, 14], "coref_clust": 1, "render": [1, 7, 10], "manual": 1, "true": [1, 7, 8, 9, 10, 11, 12, 13, 14, 15], "style": [1, 10], "option": [1, 10, 12], "jupyt": [1, 10], "\u0e41\u0e21": [1, 9, 11, 14], "\u0e2a": [1, 7, 9, 11, 12, 14, 15], "\u0e07\u0e43\u0e2b": 1, "\u0e25": [1, 3, 7, 11, 14, 15], "\u0e01\u0e0a\u0e32\u0e22\u0e44\u0e1b\u0e0b": 1, "\u0e2d\u0e02\u0e2d\u0e07": [1, 12], "\u0e40\u0e18\u0e2d\u0e01\u0e25": 1, "\u0e1a\u0e25": 1, "\u0e21\u0e40\u0e2d\u0e32\u0e15": 1, "\u0e01": [1, 3, 6, 7, 9, 11, 12, 14, 15], "\u0e01\u0e0a\u0e32\u0e22": 1, "\u0e44\u0e1b\u0e0b": 1, "\u0e40\u0e18\u0e2d": [1, 9], "\u0e2b\u0e21\u0e2d\u0e41\u0e0a\u0e21\u0e1b": 1, "\u0e40\u0e1b": [1, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e14\u0e43\u0e08\u0e17": 1, "\u0e07\u0e19": [1, 7, 11], "\u0e33\u0e15\u0e32": 1, "\u0e40\u0e2a": [1, 7, 11], "\u0e22\u0e43\u0e08\u0e17": 1, "\u0e01\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "\u0e23": [1, 3, 7, 9, 10, 11, 12, 14, 15], "\u0e01\u0e20": 1, "\u0e21": [1, 7, 8, 9, 11, 12, 14, 15], "\u0e43\u0e08\u0e17": 1, "\u0e01\u0e40\u0e2a": 1, "\u0e22\u0e2a\u0e25\u0e30": 1, "\u0e43\u0e2b": [1, 7, 9, 11], "\u0e2d\u0e0a": 1, "\u0e0a": [1, 3, 7, 8, 9, 11, 12, 15], "\u0e1e\u0e0a": 1, "\u0e27\u0e22\u0e40\u0e1e": 1, "\u0e2d\u0e19\u0e17\u0e2b\u0e32\u0e23\u0e23\u0e2d\u0e14": 1, "\u0e27\u0e40\u0e2d\u0e07\u0e40\u0e2a": 1, "\u0e22\u0e0a": [1, 7], "\u0e27": [1, 7, 9, 11, 12, 14, 15], "\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "pythainlp": [2, 3, 6, 8, 9, 11, 12, 13, 14, 15], "doe": [2, 12], "come": [2, 5, 12], "instead": [2, 10, 13, 14], "you": [2, 5, 7, 9, 10, 11, 12, 13, 14, 15], "can": [2, 7, 10, 12, 14, 15], "us": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "which": [2, 5, 7, 9, 12, 13, 15], "wa": [2, 12, 14, 15], "train": [2, 10, 11, 12, 13, 14, 15], "univers": 2, "thi": [2, 5, 7, 8, 9, 10, 12, 13, 14, 15], "tutori": [2, 5, 13, 15], "show": [2, 9, 12, 13, 14], "how": [2, 5, 12, 13, 14, 15], "spacy_thai": [2, 10], "collect": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15], "download": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15], "file": [2, 4, 5, 8, 13, 15], "pythonhost": [2, 4, 8], "org": [2, 4, 8, 10, 13, 14], "packag": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "ca": [2, 10], "2d": [2, 15], "c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303": 2, "py3": [2, 4, 8, 10, 13, 14], "ani": [2, 4, 7, 8, 10, 12, 13, 14], "whl": [2, 3, 4, 6, 8, 10, 13, 14], "1mb": [2, 8], "11": [2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "2mb": [2, 4, 8], "ufal": [2, 10], "udpip": [2, 10], "e5": 2, "72": [2, 9, 14], "2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2": 2, "tar": [2, 4, 8, 10, 14], "gz": [2, 4, 8, 10, 14], "304kb": 2, "307kb": 2, "45": [2, 7, 10, 11], "8mb": [2, 8], "requir": [2, 3, 4, 6, 7, 8, 10, 13, 14], "alreadi": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14], "satisfi": [2, 3, 4, 6, 8, 10, 13, 14], "usr": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "local": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "lib": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "python3": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "dist": [2, 3, 4, 6, 8, 9, 10, 13, 14, 15], "deplaci": [2, 10], "58": [2, 5, 7], "87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa": 2, "c1": 2, "09": 2, "1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc": 2, "10": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "6mb": [2, 8], "28": [2, 10, 14, 15], "7mb": [2, 4], "srsly": [2, 10], "request": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15], "23": [2, 4, 7, 8, 9, 10, 13, 14, 15], "thinc": [2, 10], "presh": [2, 10], "wasabi": [2, 10], "plac": 2, "cymem": [2, 10], "bli": [2, 10], "tqdm": [2, 4, 6, 8, 9, 10, 11, 12, 13, 14], "38": [2, 7, 8, 10], "41": [2, 4, 7, 8, 9], "murmurhash": [2, 10], "numpi": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "15": [2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "catalogu": [2, 10], "setuptool": [2, 10], "54": [2, 7, 8, 13], "tinydb": [2, 4, 8, 13], "af": [2, 8], "cd": [2, 8, 9], "1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c": [2, 8], "crfsuit": [2, 3, 4, 6, 8, 10, 13], "79": [2, 4, 7, 8, 9, 10], "47": [2, 7, 8, 9, 14], "58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429": [2, 8], "python_crfsuit": [2, 3, 6, 8, 10, 13], "cp37": [2, 4, 8, 13], "cp37m": [2, 4, 8, 13], "manylinux1_x86_64": [2, 4, 8, 13], "743kb": [2, 8], "747kb": [2, 8], "68": [2, 7, 13], "5mb": [2, 4], "chardet": [2, 4, 8, 10, 13], "urllib3": [2, 3, 4, 6, 8, 10, 13, 14], "26": [2, 4, 8, 10, 11, 13, 14, 15], "21": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "24": [2, 4, 7, 8, 10, 13, 14, 15], "certifi": [2, 3, 4, 6, 8, 10, 13, 14], "2017": [2, 3, 4, 6, 8, 10, 13, 14], "2020": [2, 4, 8], "12": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "idna": [2, 3, 4, 6, 8, 10, 13, 14], "importlib": [2, 4, 8, 13], "20": [2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "python_vers": [2, 4, 8], "34": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "type": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15], "extens": [2, 4, 8, 9, 10, 13, 14], "zipp": [2, 4, 8, 13], "creat": [2, 4, 7, 8, 10, 14, 15], "filenam": [2, 4, 8, 10, 13, 14, 15], "linux_x86_64": [2, 10, 13], "size": [2, 4, 8, 10, 12, 14, 15], "5626703": 2, "sha256": [2, 4, 8, 10, 14], "a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967": 2, "store": [2, 4, 8, 10, 13, 14], "directori": [2, 4, 8, 10, 14], "root": [2, 4, 8, 10, 12, 14], "cach": [2, 4, 8, 10, 14], "0c": 2, "9d": 2, "db": 2, "6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb": 2, "successfulli": [2, 3, 4, 5, 6, 8, 10, 13, 14], "built": [2, 4, 8, 10, 14], "load": [2, 5, 10, 11, 12, 13, 14, 15], "do": [2, 7, 9, 11, 12, 14, 15], "pars": [2, 10], "call": [2, 5, 7, 13, 14], "sentenc": [2, 5, 10, 14, 15], "\u0e1e\u0e27\u0e01\u0e40\u0e23\u0e32\u0e43\u0e0a": 2, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": [2, 5], "visual": [2, 9, 12, 15], "tree": 2, "graphviz": 2, "dot": 2, "pre": [3, 5, 7, 15], "0b4": 3, "22": [3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15], "31": [3, 6, 7, 9, 13, 14, 15], "charset": [3, 6, 14], "normal": [3, 6, 13, 14], "2023": [3, 6, 14], "cp310": [3, 6, 14], "manylinux_2_17_x86_64": [3, 6, 10, 13, 14], "manylinux2014_x86_64": [3, 4, 6, 8, 10, 13, 14], "993": [3, 6, 14], "16": [3, 7, 8, 9, 11, 13, 15], "corpu": [3, 4, 5, 6, 7, 9, 10, 11, 15], "thai_word": [3, 7], "syllable_token": [3, 7], "all_thai_words_dict": 3, "18": [3, 4, 7, 9, 11, 13, 15], "khave": 3, "khaveeverifi": 3, "kv": 3, "39": [3, 5, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e40\u0e17\u0e2d\u0e0d": 3, "\u0e08": [3, 7, 9, 11, 12, 14, 15], "\u0e1a": [3, 7, 9, 10, 11, 12, 14, 15], "list_sumpu": 3, "try": [3, 5, 7, 10, 12, 14], "is_sumpu": 3, "except": [3, 12], "pass": [3, 7, 13, 15], "print": [3, 4, 7, 9, 11, 12, 15], "\u0e2d": [3, 7, 9, 11, 12, 14, 15], "\u0e1f": [3, 7], "\u0e16": [3, 7, 9, 11], "\u0e2b\u0e25": [3, 6, 12, 14], "\u0e17\u0e27": 3, "\u0e1b": [3, 7, 9, 11, 14, 15], "\u0e07": [3, 7, 9, 11, 12, 14, 15], "\u0e2b": [3, 11], "\u0e04": [3, 7, 8, 9, 11, 12, 14], "\u0e2b\u0e19": [3, 7, 9, 14], "\u0e04\u0e23": [3, 5, 7, 9, 11, 12], "we": [4, 5, 6, 7, 9, 11, 12, 13, 14, 15], "machin": 4, "The": [4, 7, 9, 10, 11, 14, 15], "vistec": [4, 14], "depa": 4, "thailand": 4, "artifici": 4, "intellig": [4, 12], "research": [4, 10, 14], "institut": 4, "fairseq": 4, "ab": 4, "92c6efb05ffdfe16fbdc9e463229d9af8c3b74dc943ed4b4857a87b223c2": 4, "dataclass": 4, "2f": 4, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 4, "cython": 4, "hydra": 4, "core": [4, 10], "52": [4, 7], "e3": [4, 10], "fbd70dd0d3ce4d1d75c22d56c0c9f895cfa7ed6587a9ffb821d6812d6a60": 4, "hydra_cor": 4, "123kb": 4, "133kb": 4, "cffi": [4, 13], "sacrebleu": 4, "7e": 4, "57": [4, 7, 9, 10, 11], "0c7ca4e31a126189dab99c19951910bd081dea5bbd25f24b77107750eae7": 4, "54kb": 4, "61kb": 4, "3mb": [4, 8], "torch": [4, 9, 10, 12, 13, 14], "cu101": 4, "regex": [4, 6, 8, 10, 13, 14], "2019": [4, 7, 8, 10, 13, 14], "omegaconf": 4, "d0": 4, "eb": [4, 10], "9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684": 4, "antlr4": 4, "runtim": 4, "56": [4, 7, 9], "02": [4, 9, 11, 12], "789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d": 4, "112kb": 4, "4mb": [4, 8], "resourc": [4, 13], "pycpars": [4, 13], "portalock": 4, "89": [4, 10], "a6": 4, "3814b7107e0788040870e8825eebf214d72166adf656ba7d4bf14759a06a": 4, "py2": [4, 10], "pyyaml": [4, 10, 13, 14], "7a": 4, "a5": 4, "393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a": 4, "636kb": 4, "645kb": 4, "0mb": [4, 8], "antlr4_python3_runtim": 4, "141231": 4, "7443fbcc47b93d3b320b897cf91d8b947b6fdc6a0795dcce01ed16fd31c8ab6d": 4, "e2": [4, 15], "fa": 4, "b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8": 4, "found": [4, 5, 10, 13, 15], "exist": [4, 10, 13], "uninstal": [4, 10, 13], "sacremos": [4, 8, 13], "43": [4, 7, 8, 9, 10, 11, 14], "f5": [4, 8], "99": [4, 8, 9, 11, 12], "e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577": [4, 8], "click": [4, 6, 8, 10, 13], "joblib": [4, 6, 8, 10, 13], "six": [4, 8, 9, 10, 13], "archiv": [4, 8, 9, 11, 15], "dev": [4, 5, 7, 9, 10, 11, 14, 15], "zip": [4, 8, 9, 11, 15], "upgrad": 4, "dev0": [4, 8], "11003566": 4, "b64ebc4010c51f2644c15473edd0c49540644725a367c28baa0d3f3e19edcccb": 4, "tmp": 4, "ephem": 4, "zkojv2_o": 4, "4e": 4, "1e": [4, 9, 11, 13], "26f3198c6712ecfbee92928ed1dde923a078da3d222401cc78": 4, "download_model_al": 4, "scb_1m_en": 4, "th_mose": 4, "100": [4, 5, 7, 9, 11, 12, 14, 15], "1174648148": 4, "81506882": 4, "14it": 4, "scb_1m_th": 4, "en_spm": 4, "703780432": 4, "08": [4, 7, 10, 11, 13], "78234386": 4, "81it": 4, "enthtransl": 4, "thentransl": 4, "en": [4, 13], "have": [4, 12, 14, 15], "bpe": 4, "want": [4, 10, 12, 14], "fri": 4, "chicken": 4, "\u0e44\u0e01": [4, 7, 9], "\u0e17\u0e2d\u0e14\u0e04": 4, "\u0e30": [4, 9, 11, 15], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e01": 4, "\u0e19\u0e44\u0e01": 4, "\u0e17\u0e2d\u0e14": [4, 9], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e02": 4, "\u0e22\u0e19\u0e42\u0e1b\u0e23\u0e41\u0e01\u0e23\u0e21\u0e04\u0e2d\u0e21\u0e1e": 4, "\u0e27\u0e40\u0e15\u0e2d\u0e23": 4, "write": [4, 11, 12], "comput": 4, "program": 4, "rust": 5, "node": 5, "bind": 5, "similarli": 5, "newmm": [5, 7, 10, 14], "maxim": 5, "match": [5, 7], "base": [5, 7, 8, 9, 10, 11, 14, 15], "honor": [5, 12], "charact": [5, 12], "cluster": [5, 14], "boundari": 5, "howev": [5, 12], "compar": 5, "pure": 5, "implement": 5, "much": [5, 12], "faster": 5, "For": [5, 7, 9, 10, 13, 14, 15], "comparison": 5, "refer": 5, "benchmark": [5, 11], "segment": [5, 10], "lern": 5, "more": [5, 6, 7, 9, 10, 12, 14, 15], "about": [5, 7, 9, 12], "here": [5, 7, 12, 14], "In": [5, 6, 11, 14], "learn": [5, 9, 11, 12], "serv": 5, "first": [5, 11, 14], "without": [5, 7, 12], "specifi": [5, 7, 14], "paramet": [5, 7, 13], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e15": [5, 8], "\u0e14\u0e04\u0e33\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": 5, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a": [5, 7, 8], "\u0e14": [5, 7, 8, 9, 11, 14, 15], "\u0e04\u0e33": 5, "now": [5, 12], "enhanc": 5, "countri": [5, 12, 14], "wget": [5, 9, 11, 13, 15], "command": 5, "It": [5, 7, 8, 9, 10, 11, 14, 15], "plain": 5, "contain": [5, 9, 11], "one": [5, 7, 12, 15], "per": [5, 8], "line": [5, 9, 12], "raw": [5, 9, 11, 13, 14, 15], "countries_th": 5, "txt": [5, 6, 9, 11], "2021": [5, 6, 8, 13, 14], "06": [5, 7, 11], "05": [5, 9, 13], "resolv": [5, 15], "140": [5, 9, 15], "82": [5, 11, 15], "112": [5, 7], "connect": [5, 15], "443": [5, 15], "sent": [5, 7, 10, 15], "await": [5, 15], "respons": [5, 15], "302": [5, 15], "locat": [5, 7, 10, 15], "githubusercont": [5, 15], "follow": [5, 7, 9, 15], "185": [5, 15], "199": [5, 15], "108": [5, 15], "133": [5, 15], "109": 5, "200": [5, 9, 12, 15], "ok": [5, 7, 15], "length": [5, 8, 13, 15], "7622": 5, "4k": 5, "save": [5, 9, 11, 12, 13, 15], "44k": 5, "70": [5, 7, 9, 11, 13], "load_dict": 5, "function": [5, 7, 9, 12, 14], "content": [5, 11], "success": [5, 12], "name": [5, 9, 10, 11, 12, 13, 14], "ha": [5, 8, 12, 14, 15], "been": [5, 12, 14], "final": 5, "method": [5, 14], "\u0e2a\u0e27": [5, 9, 11], "\u0e2a\u0e14": [5, 9, 11], "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 5, "\u0e40\u0e01\u0e32\u0e2b\u0e25": 5, "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 5, "\u0e44\u0e17\u0e22": [5, 11], "data": [6, 10, 11, 12, 13, 14], "orchid": 6, "read": [6, 7, 10, 11, 12, 13], "pull": [6, 15], "524": 6, "svgling": 6, "nltk": [6, 10], "svgwrite": 6, "66": [6, 7], "word_token": [6, 7, 9, 11, 15], "pos_tag": [6, 7, 8], "chunk_pars": 6, "conlltags2tre": 6, "test": [6, 7, 9, 11, 12], "m": [6, 7, 9], "w": [6, 7, 11, 15], "t": [6, 7, 8, 9, 10, 12, 14], "engin": [6, 7, 8, 10], "perceptron": [6, 10], "p": [6, 7, 9, 13, 15], "draw_tre": 6, "\u0e41\u0e21\u0e27\u0e01": 6, "\u0e19\u0e1b\u0e25\u0e32": 6, "\u0e04\u0e19\u0e2b\u0e19\u0e2d\u0e07\u0e04\u0e32\u0e22\u0e40\u0e1b": 6, "\u0e19\u0e04\u0e19\u0e19": 6, "\u0e32\u0e23": [6, 7], "\u0e1b\u0e25\u0e32\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e43\u0e19\u0e19": 6, "\u0e33": [6, 9, 15], "\u0e33\u0e21": 6, "\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e17\u0e33\u0e44\u0e21\u0e40\u0e02\u0e32\u0e23": 6, "\u0e01\u0e04": 6, "\u0e13": [6, 7, 9, 11], "\u0e04\u0e19\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e07\u0e15": [6, 7, 12], "\u0e19\u0e44\u0e21": [6, 7, 15], "basic": 7, "modul": [7, 9, 10, 12], "uncom": [7, 9, 11, 12, 15], "run": [7, 9, 10, 11, 12, 13, 15], "colab": [7, 9, 10, 11, 12, 13, 14, 15], "extra": 7, "blob": [7, 13], "epitran": 7, "__version__": 7, "provid": [7, 8, 11, 12, 14], "some": [7, 10, 12, 14], "readi": 7, "set": [7, 9, 10, 11, 12, 13, 14, 15], "g": [7, 10, 14], "conson": 7, "vowel": 7, "tonemark": 7, "symbol": 7, "conveni": 7, "There": 7, "ar": [7, 9, 10, 11, 12, 13, 14, 15], "also": [7, 9, 12, 15], "few": [7, 12], "util": [7, 13], "thai_charact": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e24\u0e26\u0e30": 7, "\u0e32\u0e33": [7, 15], "\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45": 7, "\u0e2f": 7, "\u0e46": [7, 9, 11], "\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59": 7, "88": [7, 10], "thai_conson": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e": 7, "44": 7, "\u0e54": 7, "thai_digit": 7, "isthai": 7, "fals": [7, 8, 9, 10, 11, 12, 13], "ignore_char": 7, "counthai": 7, "proport": 7, "ignor": [7, 11], "non": [7, 15], "alphabet": 7, "default": [7, 8, 9, 10, 11, 14], "countthai": 7, "\u0e19\u0e2d\u0e32\u0e17": [7, 11, 14], "\u0e15\u0e22": [7, 11, 14], "\u0e19\u0e32\u0e04\u0e21": 7, "2562": [7, 14], "67": 7, "85714285714286": 7, "sort": [7, 12], "accord": 7, "\u0e2d\u0e19": [7, 9, 11, 14], "\u0e01\u0e23\u0e30\u0e14\u0e32\u0e29": 7, "\u0e01\u0e23\u0e23\u0e44\u0e01\u0e23": 7, "\u0e44\u0e02": [7, 11], "\u0e1c": [7, 9, 11, 12, 15], "\u0e32\u0e44\u0e2b\u0e21": 7, "revers": 7, "dai": [7, 11, 12, 15], "month": 7, "buddhist": 7, "era": 7, "b": [7, 8, 9, 11, 12], "direct": [7, 12], "similar": [7, 12, 14], "datetim": 7, "strftime": 7, "thai_strftim": 7, "fmt": [7, 9, 14], "a\u0e17": 7, "d": [7, 9, 13], "\u0e28": [7, 9, 11, 12, 14], "y": [7, 9, 11, 12, 15], "\u0e40\u0e27\u0e25\u0e32": 7, "h": 7, "1976": 7, "40": [7, 9, 10, 11, 13], "\u0e19\u0e1e": [7, 12, 14], "\u0e18\u0e17": 7, "\u0e25\u0e32\u0e04\u0e21": 7, "2519": 7, "01": [7, 9, 11, 12, 13], "modifi": 7, "appli": [7, 9, 11], "right": [7, 11, 14, 15], "befor": [7, 9, 11, 12, 13], "minu": 7, "pad": [7, 13, 14], "numer": [7, 11], "result": [7, 10, 12, 13, 14], "avail": [7, 14], "underscor": 7, "space": 7, "zero": [7, 13], "convert": [7, 13], "upper": 7, "case": [7, 10, 11, 12, 14], "swap": 7, "o": [7, 8, 13, 14], "letter": [7, 15], "altern": 7, "note": [7, 10, 14, 15], "thai_tim": 7, "renam": 7, "time_to_thaiword": 7, "\u0e19\u0e22": 7, "\u0e19\u0e32\u0e2c": 7, "\u0e01\u0e32\u0e2a": 7, "\u0e1a\u0e2a": 7, "\u0e19\u0e32\u0e17": [7, 9], "\u0e1a\u0e40\u0e01": 7, "\u0e32\u0e27": [7, 9], "wai": [7, 14], "chosen": 7, "24h": 7, "6h": 7, "m6h": 7, "yourself": [7, 12], "\u0e40\u0e17": 7, "\u0e22\u0e07\u0e04": 7, "\u0e19\u0e2a": [7, 11], "precis": 7, "well": [7, 14], "minut": [7, 9, 12], "second": [7, 15], "onli": [7, 10, 12], "valu": [7, 12], "30": [7, 9, 14, 15], "\u0e2a\u0e2d\u0e07\u0e42\u0e21\u0e07\u0e40\u0e0a": 7, "\u0e32\u0e2a": 7, "\u0e1a\u0e40\u0e08": 7, "\u0e14\u0e19\u0e32\u0e17": 7, "\u0e41\u0e1b\u0e14\u0e42\u0e21\u0e07\u0e2a": 7, "\u0e2b\u0e01\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "object": [7, 13], "\u0e1a\u0e2a\u0e32\u0e21\u0e19\u0e32\u0e2c": 7, "\u0e1a\u0e2b": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e2a": 7, "At": 7, "sub": 7, "crfcut": [7, 10], "uss": 7, "sent_token": 7, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e1a": 7, "\u0e0d\u0e0d": 7, "\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e01\u0e32\u0e23\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07\u0e41\u0e1c": 7, "\u0e19\u0e2a\u0e22\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e04\u0e23\u0e32\u0e27": 7, "\u0e17\u0e18\u0e28": 7, "\u0e01\u0e23\u0e32\u0e0a": 7, "\u0e52\u0e54\u0e57\u0e55": 7, "\u0e19\u0e23": [7, 11], "\u0e10\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e09\u0e1a": 7, "\u0e1a\u0e0a": 7, "\u0e0b": [7, 9, 11, 12, 14], "\u0e07\u0e16": 7, "\u0e2d\u0e27": [7, 8, 9], "\u0e32\u0e40\u0e1b": [7, 11], "\u0e1a\u0e41\u0e23\u0e01\u0e41\u0e2b": 7, "\u0e07\u0e23\u0e32\u0e0a\u0e2d\u0e32\u0e13\u0e32\u0e08": 7, "\u0e01\u0e23\u0e2a\u0e22\u0e32\u0e21": 7, "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e43\u0e0a": 7, "\u0e40\u0e21": [7, 9, 11], "\u0e19\u0e17": [7, 9, 10, 11, 12, 14], "27": [7, 8, 10, 11, 13, 14, 15], "\u0e19\u0e32\u0e22\u0e19": 7, "2475": 7, "\u0e42\u0e14\u0e22\u0e40\u0e1b": 7, "\u0e19\u0e1c\u0e25\u0e1e\u0e27\u0e07\u0e2b\u0e25": 7, "\u0e07\u0e01\u0e32\u0e23\u0e1b\u0e0f": 7, "\u0e42\u0e14\u0e22\u0e04\u0e13\u0e30\u0e23\u0e32\u0e29\u0e0e\u0e23": 7, "nwhitespac": 7, "newlin": 7, "whitespac": 7, "maximum": [7, 8], "algorithm": 7, "\u0e08\u0e30\u0e23": 7, "\u0e04\u0e27\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e23": 7, "\u0e32\u0e22\u0e17": 7, "\u0e17\u0e33\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30\u0e04\u0e07\u0e08\u0e30\u0e44\u0e21": 7, "\u0e22\u0e2d\u0e21\u0e43\u0e2b": 7, "\u0e17\u0e33\u0e19\u0e32\u0e1a\u0e19\u0e2b\u0e25": 7, "\u0e07\u0e04\u0e19": 7, "nnewmm": 7, "keep_whitespac": 7, "\u0e08\u0e30": [7, 9, 11], "\u0e04\u0e27\u0e32\u0e21": [7, 9], "\u0e17\u0e33": [7, 9], "\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30": [7, 9, 11, 12, 15], "\u0e04\u0e07\u0e08\u0e30": 7, "other": [7, 12, 14, 15], "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e09\u0e1a": 7, "\u0e1a\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2b\u0e21": 7, "\u0e41\u0e25": [7, 9, 11, 14], "longest": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "custom_token": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "\u0e09\u0e1a": 7, "\u0e43\u0e2b\u0e21": [7, 9, 11], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22": 7, "common": [7, 15], "add": [7, 8, 14], "remov": [7, 13, 14], "new": [7, 9, 12, 15], "\u0e22\u0e32\u0e22\u0e27": 7, "\u0e17\u0e22\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23": [7, 12], "\u0e02\u0e2d\u0e07\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "\u0e2d\u0e2a": 7, "\u0e21\u0e2d\u0e1f": 7, "frozenset": 7, "\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "isaac": 7, "asimov": 7, "\u0e22\u0e32\u0e22": 7, "\u0e02\u0e2d\u0e07": [7, 9, 11, 15], "\u0e21\u0e2d": 7, "trie": 7, "ilo87": 7, "\u0e32\u0e14": 7, "\u0e27\u0e22\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e04": 7, "\u0e21\u0e04\u0e23\u0e2d\u0e07\u0e2a": 7, "\u0e17\u0e18": [7, 11, 14], "\u0e43\u0e19\u0e01\u0e32\u0e23\u0e23\u0e27\u0e21\u0e15": 7, "ilo98": 7, "\u0e27\u0e22\u0e2a": 7, "\u0e27\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e23": 7, "\u0e27\u0e21\u0e40\u0e08\u0e23\u0e08\u0e32\u0e15": 7, "\u0e2d\u0e23\u0e2d\u0e07": 7, "new_word": 7, "\u0e01\u0e32\u0e23\u0e23": 7, "\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e2a": 7, "\u0e21\u0e1e": 7, "\u0e19\u0e18": [7, 15], "union": 7, "custom_dictionary_tri": 7, "custom_dict": 7, "ilo": 7, "87": 7, "\u0e27\u0e22": [7, 9, 11], "\u0e20\u0e32\u0e1e": 7, "\u0e43\u0e19": [7, 9, 11], "\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e01\u0e32\u0e23": [7, 9, 11, 15], "\u0e21\u0e04\u0e23\u0e2d\u0e07": 7, "\u0e23\u0e27\u0e21\u0e15": 7, "98": [7, 11], "\u0e27\u0e21": [7, 9], "\u0e40\u0e08\u0e23\u0e08\u0e32": 7, "differ": [7, 15], "speedtest_text": 7, "\u0e04\u0e23\u0e1a\u0e23\u0e2d\u0e1a": 7, "\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e40\u0e0a": [7, 11, 14], "\u0e19\u0e19": [7, 9, 11], "2547": 7, "\u0e21\u0e19": [7, 15], "\u0e21\u0e0a\u0e32\u0e22\u0e01\u0e27": 7, "\u0e32": [7, 8, 9, 11, 12, 14, 15], "370": 7, "\u0e04\u0e19": [7, 9, 11, 14], "\u0e01\u0e42\u0e22\u0e19\u0e02": 7, "\u0e19\u0e23\u0e16\u0e22": 7, "\u0e40\u0e2d": [7, 9], "\u0e21\u0e0b": 7, "\u0e2b\u0e23": [7, 9, 11, 14], "\u0e19\u0e2d\u0e19\u0e0b": 7, "\u0e2d\u0e19\u0e01": [7, 9], "\u0e19\u0e04": 7, "\u0e19\u0e25\u0e30": 7, "\u0e40\u0e14": [7, 11, 12], "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e16\u0e32\u0e19": 7, "\u0e15\u0e33\u0e23\u0e27\u0e08\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e44\u0e1b\u0e44\u0e01\u0e25": 7, "150": [7, 9], "\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23": [7, 14], "\u0e44\u0e1b\u0e16": 7, "\u0e07\u0e04": 7, "\u0e32\u0e22\u0e2d": 7, "\u0e07\u0e04\u0e22": 7, "\u0e17\u0e18\u0e1a\u0e23": 7, "\u0e2b\u0e32\u0e23": 7, "\u0e40\u0e27\u0e25\u0e32\u0e01\u0e27": 7, "\u0e27\u0e42\u0e21\u0e07": 7, "\u0e43\u0e19\u0e2d": [7, 14], "\u0e01\u0e04\u0e14": 7, "\u0e0d\u0e32\u0e15": 7, "\u0e2d\u0e07\u0e23": [7, 9], "\u0e10": 7, "\u0e04\u0e14": 7, "\u0e08\u0e1a\u0e25\u0e07\u0e17": 7, "\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e19": 7, "\u0e1b\u0e23\u0e30\u0e19\u0e2d\u0e21\u0e22\u0e2d\u0e21\u0e04\u0e27\u0e32\u0e21": 7, "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21\u0e08": 7, "\u0e32\u0e22\u0e04": 7, "\u0e19\u0e44\u0e2b\u0e21\u0e17\u0e14\u0e41\u0e17\u0e19\u0e23\u0e27\u0e21": 7, "42": [7, 8, 9, 10, 13], "\u0e32\u0e19\u0e1a\u0e32\u0e17\u0e43\u0e2b": 7, "\u0e1a\u0e0d\u0e32\u0e15": 7, "\u0e22\u0e2b\u0e32\u0e22": 7, "\u0e23\u0e32\u0e22": 7, "\u0e14\u0e2b": 7, "\u0e1a\u0e41\u0e25\u0e30\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e2a\u0e23": 7, "\u0e08\u0e41\u0e25": 7, "\u0e27\u0e22\u0e40\u0e25": 7, "\u0e2d\u0e01\u0e15": 7, "\u0e07\u0e17": [7, 9], "\u0e40\u0e02\u0e15": 7, "\u0e41\u0e02\u0e27\u0e07\u0e2b": 7, "\u0e27\u0e2b\u0e21\u0e32\u0e01": 7, "\u0e40\u0e02\u0e15\u0e1a\u0e32\u0e07\u0e01\u0e30\u0e1b": 7, "\u0e01\u0e23": [7, 11], "\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23": [7, 11], "\u0e2a\u0e21": [7, 12], "\u0e41\u0e25\u0e30\u0e15": 7, "\u0e27\u0e41\u0e17\u0e19\u0e1e\u0e23\u0e23\u0e04\u0e01\u0e32\u0e23\u0e40\u0e21": 7, "\u0e2d\u0e07\u0e08\u0e32\u0e01\u0e2b\u0e25\u0e32\u0e22\u0e1e\u0e23\u0e23\u0e04\u0e15": 7, "\u0e32\u0e07\u0e21\u0e32\u0e40\u0e1d": 7, "\u0e07\u0e40\u0e01\u0e15\u0e01\u0e32\u0e23\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e43\u0e01\u0e25": 7, "\u0e42\u0e14\u0e22": [7, 9, 11], "\u0e20": [7, 8], "\u0e2a\u0e23": [7, 9, 14], "\u0e42\u0e0a\u0e15": [7, 9], "\u0e40\u0e14\u0e0a\u0e32\u0e0a": 7, "\u0e22\u0e19": [7, 8, 9, 10, 11, 14], "\u0e19\u0e15": [7, 9, 14, 15], "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1e\u0e25": 7, "\u0e07\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e23": 7, "\u0e41\u0e25\u0e30\u0e1e\u0e23": 7, "\u0e29\u0e10": 7, "\u0e0a\u0e23\u0e2a": 7, "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e18": 7, "\u0e44\u0e14": [7, 9, 11, 12, 14, 15], "\u0e04\u0e30\u0e41\u0e19\u0e19": 7, "96": 7, "\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e17": 7, "\u0e32\u0e01": [7, 14], "\u0e40\u0e21\u0e29\u0e32\u0e22\u0e19": [7, 11], "\u0e07\u0e40\u0e1b": 7, "\u0e19\u0e27": 7, "\u0e19\u0e2d": [7, 11], "\u0e2a\u0e40\u0e15\u0e2d\u0e23": 7, "\u0e19\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e02\u0e2d\u0e07\u0e0a\u0e32\u0e27\u0e04\u0e23": 7, "\u0e2a\u0e15": 7, "\u0e40\u0e01": [7, 9, 11, 14], "\u0e14\u0e40\u0e2b\u0e15": 7, "\u0e23\u0e30\u0e40\u0e1a": 7, "\u0e14\u0e15": 7, "\u0e2d\u0e40\u0e19": 7, "\u0e2d\u0e07\u0e43\u0e19\u0e42\u0e1a\u0e2a\u0e16": 7, "\u0e41\u0e25\u0e30\u0e42\u0e23\u0e07\u0e41\u0e23\u0e21\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e19": 7, "\u0e2d\u0e22": [7, 8, 9, 11, 14], "\u0e41\u0e2b": [7, 15], "\u0e07\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e28\u0e23": 7, "\u0e07\u0e01\u0e32": 7, "\u0e15\u0e41\u0e25": 7, "\u0e27\u0e2d\u0e22": 7, "156": 7, "\u0e41\u0e25\u0e30\u0e1a\u0e32\u0e14\u0e40\u0e08": 7, "\u0e1a\u0e2b\u0e25\u0e32\u0e22\u0e23": 7, "\u0e2d\u0e22\u0e04\u0e19": 7, "\u0e07\u0e44\u0e21": 7, "\u0e02": [7, 9, 11, 14], "\u0e2d\u0e21": [7, 11, 14], "\u0e25\u0e27": 7, "\u0e32\u0e1c": 7, "\u0e2d\u0e40\u0e2b\u0e15": 7, "\u0e21\u0e32\u0e08\u0e32\u0e01\u0e1d": 7, "\u0e32\u0e22\u0e43\u0e14": 7, "\u0e19\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e08": 7, "\u0e14\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a": 7, "\u0e21\u0e02": [7, 11, 14], "\u0e2d\u0e23": [7, 9], "\u0e40\u0e23": [7, 10, 11], "\u0e21\u0e2a\u0e32\u0e22\u0e41\u0e16\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e2a": 7, "\u0e19\u0e17\u0e32\u0e07\u0e43\u0e19\u0e0a": 7, "\u0e27\u0e07\u0e1b\u0e25\u0e32\u0e22\u0e2a": 7, "\u0e1b\u0e14\u0e32\u0e2b": [7, 11], "\u0e01\u0e01": [7, 9, 15], "\u0e07\u0e22": 7, "\u0e2d\u0e20": 7, "\u0e21\u0e2b\u0e32\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e40\u0e0a": 7, "\u0e2d\u0e21\u0e42\u0e25\u0e01\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e40\u0e04\u0e23": [7, 9, 11], "\u0e2d\u0e07\u0e21": 7, "\u0e2d\u0e41\u0e1c": 7, "\u0e1e\u0e25": 7, "\u0e1a\u0e1f": 7, "\u0e07\u0e02": [7, 9], "\u0e08\u0e32\u0e23\u0e13": 7, "\u0e1b\u0e23\u0e30\u0e40\u0e14": [7, 11], "\u0e19\u0e01": [7, 14], "\u0e1a\u0e14": [7, 15], "\u0e01\u0e2b\u0e19": 7, "\u0e41\u0e25\u0e30\u0e04\u0e27\u0e32\u0e21\u0e44\u0e21": 7, "\u0e42\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2a": 7, "\u0e10\u0e1a\u0e32\u0e25\u0e1b": 7, "\u0e07\u0e1a\u0e2d\u0e01\u0e27": 7, "\u0e40\u0e27\u0e17": 7, "\u0e1b\u0e23\u0e30\u0e0a": 7, "belt": 7, "road": 7, "forum": 7, "\u0e43\u0e19\u0e0a": [7, 12], "\u0e27\u0e07\u0e27": 7, "\u0e2d\u0e40\u0e1b": [7, 12], "\u0e19\u0e07\u0e32\u0e19\u0e01\u0e32\u0e23\u0e17": 7, "\u0e15\u0e17": 7, "\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e17": 7, "\u0e14\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e19\u0e43\u0e19\u0e1b": 7, "speed": 7, "through": [7, 12], "wrapper": 7, "cpu": [7, 9], "user": [7, 9, 10, 11], "253": 7, "sy": [7, 9], "total": [7, 9, 11, 12], "256": 7, "wall": [7, 9], "255": 7, "60": [7, 9], "\u00b5": 7, "46": [7, 10, 13, 15], "safe": 7, "33": [7, 9, 14], "attacut": [7, 10], "833": 7, "174": [7, 11], "576": 7, "possibl": [7, 15], "multi_cut": 7, "find_all_seg": 7, "mmcut": 7, "\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": [7, 9], "\u0e19\u0e44\u0e1b\u0e44\u0e14": 7, "\u0e32\u0e07\u0e44\u0e23\u0e1a": 7, "\u0e32\u0e07": [7, 9, 11], "\u0e44\u0e1b": [7, 9, 10, 11], "\u0e44\u0e23": [7, 14], "\u0e19\u0e44\u0e1b": [7, 9], "\u0e32\u0e07\u0e44\u0e23": 7, "either": 7, "ssg": [7, 10, 14], "ponrawe": 7, "__": [7, 11], "crf": 7, "prasertsom": 7, "smaller": [7, 14], "than": [7, 12, 15], "inform": [7, 9], "retriev": 7, "theeramunkong": 7, "et": 7, "al": 7, "2004": 7, "unit": 7, "35": [7, 9, 10, 14, 15], "subword_token": [7, 8], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": [7, 9], "\u0e23\u0e30": [7, 9], "\u0e44\u0e17": 7, "dict": [7, 9, 11, 12, 13], "known": [7, 15], "36": [7, 9, 10, 11, 13, 14, 15], "\u0e25\u0e40\u0e25\u0e32\u0e30": 7, "\u0e0b\u0e2d\u0e21": 7, "\u0e0b\u0e2d": 7, "\u0e2a\u0e21\u0e2d\u0e07\u0e1a\u0e27\u0e21\u0e23": 7, "\u0e19\u0e41\u0e23\u0e07": 7, "\u0e40\u0e25\u0e32\u0e30": 7, "\u0e2a\u0e21\u0e2d\u0e07": 7, "\u0e1a\u0e27\u0e21": 7, "\u0e41\u0e23\u0e07": 7, "extern": 7, "ommit": 7, "output": [7, 11, 13, 14], "37": [7, 11, 13], "These": 7, "task": [7, 10, 14], "like": [7, 9, 11, 12, 14], "cut": 7, "certain": [7, 12], "point": [7, 12, 15], "typo": 7, "tcc_po": 7, "posit": [7, 15], "ch": 7, "two": [7, 11, 12, 15], "roman": 7, "latin": 7, "royal": 7, "system": [7, 10, 12], "transcript": 7, "rtg": 7, "support": [7, 8, 15], "simpl": [7, 10, 14, 15], "royin": 7, "accur": 7, "thai2rom": 7, "context": 7, "mean": [7, 9, 12, 13], "sound": [7, 13], "represent": 7, "ipa": 7, "intern": 7, "phonet": 7, "icu": 7, "compon": 7, "unicod": 7, "pyicu": 7, "\u0e41\u0e21\u0e27": [7, 10, 15], "maeo": 7, "\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": [7, 11], "phapn": 7, "obvious": 7, "wrong": [7, 12], "m\u025b\u02d0w": 7, "updat": [7, 9, 14], "g2p": 7, "up": [7, 12], "\u025b\u02d0": 7, "p\u02b0a\u02d0pjanot": 7, "p\u02b0": 7, "a\u02d0": 7, "n": [7, 8, 11], "width": 7, "zwsp": 7, "zwnj": 7, "duplic": 7, "repeat": [7, 9], "dangl": 7, "reorder": 7, "tone": 7, "mark": 7, "dure": 7, "\u0e40\u0e40\u0e1b\u0e25\u0e01": 7, "\u0e41\u0e1b\u0e25\u0e01": 7, "\u0e40": 7, "v": [7, 13], "\u0e41": 7, "below": 7, "standard": 7, "order": [7, 9, 11, 15], "sara": 7, "aa": 7, "mai": [7, 12, 14], "ek": 7, "\u0e40\u0e01\u0e32": 7, "includ": [7, 9, 15], "\u0e1a\u0e27": 7, "\u0e1e\u0e23": 7, "immedi": 7, "nnormal": 7, "multipl": [7, 13], "A": 7, "row": [7, 11, 15], "keep": 7, "them": [7, 11, 12], "reduc": 7, "variat": 7, "48": [7, 9, 10, 13], "\u0e40\u0e01\u0e30\u0e30\u0e30": 7, "\u0e40\u0e01\u0e30": 7, "just": [7, 12], "seri": [7, 12], "remove_zw": 7, "remove_dup_spac": 7, "remove_repeat_vowel": 7, "remove_dangl": 7, "If": [7, 10, 14], "don": [7, 12], "behavior": 7, "those": [7, 12], "shown": 7, "abov": 7, "remove_tonemark": 7, "reorder_vowel": 7, "individu": 7, "your": [7, 12, 13], "own": [7, 12], "sometim": 7, "perform": [7, 9, 11, 12, 14], "search": [7, 14], "pythainp": 7, "deal": [7, 12], "49": 7, "arabic_digit_to_thai_digit": 7, "thai_digit_to_arabic_digit": 7, "digit_to_text": 7, "\u0e09": [7, 14], "\u0e01\u0e40\u0e09": 7, "\u0e42\u0e23\u0e1b\u0e40\u0e23": 7, "\u0e22\u0e01": 7, "\u0e51\u0e51\u0e52": 7, "50": [7, 9, 11], "51": [7, 11, 15], "\u0e07\u0e2b\u0e19": [7, 9, 12], "\u0e07\u0e2a\u0e2d\u0e07": 7, "index": [7, 9, 10, 11, 14, 15], "wikipedia": [7, 11, 12, 14], "three": 7, "kind": [7, 12], "lk82": 7, "metasound": 7, "udom83": 7, "equival": 7, "\u0e23\u0e16": [7, 9, 11], "\u0e23\u0e14": 7, "\u0e27\u0e23\u0e23": 7, "\u0e19\u0e20": 7, "\u0e23\u0e13\u0e30": 7, "\u0e23\u0e13\u0e01\u0e32\u0e23": 7, "\u0e21\u0e23\u0e23\u0e04": 7, "\u0e01\u0e29": [7, 15], "\u0e1ae400": 7, "\u0e1a930000": 7, "\u0e1a550": 7, "\u0e1ae419": 7, "\u0e1a931900": 7, "\u0e1a551": 7, "\u0e211000": 7, "\u0e21100000": 7, "\u0e21100": 7, "\u0e21310000": 7, "\u0e21551": 7, "\u0e231000": 7, "\u0e23100000": 7, "\u0e25100": 7, "\u0e23100": 7, "peter": 7, "norvig": 7, "togeth": 7, "nation": 7, "tnc": 7, "\u0e40\u0e2b\u0e25": [7, 9], "\u0e22\u0e21": 7, "correct": [7, 15], "most": [7, 12, 15], "55": [7, 11], "when": [7, 9, 10, 12, 14], "norvigspellcheck": 7, "kei": [7, 15], "int": [7, 11], "tupl": [7, 13, 15], "assign": 7, "everi": [7, 9, 12], "user_dict": 7, "1000": [7, 9, 11, 15], "\u0e22\u0e27": [7, 9, 11, 14, 15], "1000000": 7, "checker": [7, 15], "As": 7, "our": [7, 14], "give": [7, 9, 12], "edit": [7, 12, 15], "distanc": 7, "prioriti": 7, "over": 7, "textbook": 7, "By": 7, "ttc": 7, "word_freq": 7, "To": [7, 9], "current": [7, 14], "59": [7, 9, 13], "\u0e18": [7, 9, 14], "\u0e44\u0e2a": 7, "\u0e01\u0e23\u0e2d\u0e01": 7, "\u0e1b\u0e25": [7, 11], "\u0e40\u0e15": [7, 9, 11], "\u0e02\u0e2d\u0e1a\u0e04": [7, 14], "356": 7, "\u0e1b\u0e23\u0e30\u0e2a\u0e32\u0e19": 7, "84": [7, 14], "\u0e23\u0e33\u0e44\u0e23": 7, "\u0e27\u0e21\u0e17": 7, "\u0e2d\u0e07": [7, 9, 11, 14], "\u0e1d": 7, "\u0e01\u0e21\u0e30\u0e02\u0e32\u0e21": 7, "condit": 7, "filter": 7, "39963": 7, "61": [7, 11], "min_freq": [7, 9, 11, 12], "min_len": 7, "max_len": [7, 9], "30376": 7, "62": [7, 13], "checker_no_filt": 7, "dict_filt": 7, "66209": 7, "63": [7, 10], "remove_yamok": 7, "els": [7, 12, 14], "checker_custom_filt": 7, "66204": 7, "64": [7, 10, 11, 12, 13], "pos_tag_s": 7, "\u0e19\u0e17\u0e32\u0e07": 7, "fixn": 7, "vact": 7, "65": [7, 14], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e2a\u0e33\u0e19": 7, "\u0e01\u0e19\u0e32\u0e22\u0e01\u0e2f": 7, "\u0e2a\u0e23\u0e23\u0e40\u0e2a\u0e23": 7, "\u0e0d": [7, 11, 15], "\u0e41\u0e01": 7, "\u0e27\u0e01\u0e33\u0e40\u0e19": 7, "\u0e19\u0e08\u0e32\u0e01\u0e15\u0e33\u0e41\u0e2b\u0e19": 7, "\u0e17\u0e23\u0e07\u0e04": 7, "\u0e13\u0e27": 7, "\u0e12": 7, "\u0e40\u0e28\u0e29": [7, 9], "\u0e01\u0e2d\u0e07\u0e17": 7, "\u0e1e\u0e1a\u0e01": [7, 12], "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21": 7, "\u0e2d\u0e18": 7, "\u0e01\u0e23\u0e21\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2a": 7, "ncmn": 7, "punc": 7, "jsbr": 7, "jcrg": 7, "vsta": 7, "tagger": [7, 14], "bio": 7, "scheme": 7, "begin": [7, 12], "insid": [7, 13], "outsid": [7, 12], "pip3": 7, "ner": [7, 8, 10, 14], "named_ent": 7, "thainametagg": [7, 8], "get_ner": [7, 8], "2563": 7, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a\u0e40\u0e27\u0e25\u0e32": 7, "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e02\u0e19\u0e2a": 7, "\u0e07\u0e01\u0e23": 7, "\u0e07\u0e40\u0e17\u0e1e\u0e43\u0e01\u0e25": 7, "\u0e16\u0e19\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e44\u0e1b\u0e08": 7, "\u0e07\u0e2b\u0e27": [7, 14], "\u0e14\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e27\u0e23\u0e32\u0e04\u0e32": 7, "297": [7, 15], "\u0e1a\u0e32\u0e17": [7, 9], "num": [7, 13], "punct": 7, "noun": [7, 10], "verb": [7, 10, 15], "\u0e23\u0e30\u0e1a\u0e1a": [7, 9], "\u0e08\u0e32\u0e01": [7, 9, 11, 12], "adp": 7, "\u0e02\u0e19\u0e2a": 7, "organ": [7, 8, 14], "\u0e07\u0e40\u0e17\u0e1e": 7, "\u0e43\u0e01\u0e25": 7, "adj": 7, "\u0e16\u0e19\u0e19": 7, "\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "aux": [7, 10, 13], "\u0e23\u0e32\u0e04\u0e32": 7, "monei": [7, 12], "word_vector": [7, 15], "\u0e29\u0e22": [7, 15], "2504981": 7, "doesnt_match": [7, 15], "\u0e04\u0e04\u0e25": 7, "\u0e40\u0e08": [7, 9, 11, 12, 15], "\u0e32\u0e2b\u0e19": 7, "\u0e32\u0e17": 7, "site": 7, "gensim": [7, 15], "keyedvector": [7, 15], "877": 7, "futurewarn": [7, 15], "arrai": [7, 9, 11, 13, 14, 15], "stack": [7, 15], "must": [7, 12, 13, 15], "iter": [7, 11, 15], "deprec": [7, 10, 13, 14, 15], "rais": [7, 15], "an": [7, 12, 14, 15], "error": [7, 15], "futur": [7, 15], "vstack": [7, 15], "self": [7, 15], "word_vec": [7, 15], "use_norm": [7, 15], "used_word": [7, 15], "astyp": [7, 9, 13, 15], "real": [7, 12, 15], "69": [7, 13], "bahttext": 7, "1234567890123": 7, "\u0e07\u0e25": 7, "\u0e32\u0e19\u0e2a\u0e2d\u0e07\u0e41\u0e2a\u0e19\u0e2a\u0e32\u0e21\u0e2b\u0e21": 7, "\u0e19\u0e2b": 7, "\u0e2d\u0e22\u0e2b\u0e01\u0e2a": 7, "\u0e14\u0e25": 7, "\u0e32\u0e19\u0e41\u0e1b\u0e14\u0e41\u0e2a\u0e19\u0e40\u0e01": 7, "\u0e32\u0e2b\u0e21": 7, "\u0e19\u0e2b\u0e19": 7, "\u0e07\u0e23": [7, 11], "\u0e2d\u0e22\u0e22": 7, "\u0e1a\u0e2a\u0e32\u0e21\u0e1a\u0e32\u0e17\u0e2a": 7, "\u0e32\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "round": [7, 13], "satang": 7, "909": 7, "\u0e07\u0e1a\u0e32\u0e17\u0e40\u0e01": 7, "\u0e1a\u0e40\u0e2d": 7, "\u0e14\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "lowphansirikul": 8, "l": [8, 9, 11, 15], "polpanuma": 8, "c": [8, 9, 11, 15], "jantrakulchai": 8, "nutanong": 8, "arxiv": 8, "preprint": 8, "2101": 8, "09635": 8, "jan": 8, "full": [8, 12], "thai2transform": [8, 14], "11006400": 8, "f89b594cbbebbc1940c16b0957a74182f2ea8169de8270e33f0c6bac5d1d4fcd": 8, "9a": 8, "9e": 8, "b2ab1db5c70b14b8d5d8a402e36ed915c2ec906df5c4f4b089": 8, "f9": 8, "5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23": 8, "9mb": 8, "71": 8, "2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97": 8, "manylinux2010_x86_64": [8, 13], "filelock": [8, 10, 13, 14], "7d": 8, "09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10": 8, "883kb": 8, "890kb": 8, "pypars": [8, 10, 13], "893262": 8, "26dd1871c98e4cd5fe1938dbeba7086606c31e80a945ec9f752859e252fe7068": 8, "3c": 8, "fd": 8, "7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45": 8, "dataset": [8, 10, 14], "thainer": [8, 10], "lst20": [8, 10], "dataset_nam": [8, 14], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e1c\u0e21\u0e21": 8, "\u0e19\u0e32\u0e22\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e17\u0e17": 8, "\u0e22\u0e44\u0e1e\u0e1a": 8, "\u0e25\u0e22": 8, "ask": 8, "truncat": 8, "max_length": 8, "predefin": 8, "person": [8, 12], "\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 10, 14], "\u0e22\u0e19\u0e2a\u0e27\u0e19\u0e01": [8, 14], "\u0e2b\u0e25\u0e32\u0e1a\u0e40\u0e1b": [8, 14], "\u0e19\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 14], "\u0e22\u0e19\u0e17": [8, 14], "\u0e2a\u0e27\u0e19\u0e01": [8, 14], "\u0e2b\u0e25\u0e32\u0e1a": [8, 14], "t2": [8, 14], "grouped_ent": [8, 14], "ttl": 8, "\u0e19\u0e32\u0e22": [8, 12], "\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e1c\u0e21\u0e21": 8, "\u0e1c\u0e21": [8, 10, 11], "pr": 8, "nn": [8, 10], "\u0e27\u0e23\u0e23\u0e13": 8, "\u0e1e\u0e07\u0e29": 8, "\u0e44\u0e1e\u0e1a": 8, "grouped_word": 8, "\u0e14\u0e04\u0e33\u0e22": 8, "detail": [9, 12], "step": [9, 11], "taken": 9, "analyz": [9, 11], "evalu": 9, "metric": [9, 11, 12, 14], "overal": 9, "accuraci": [9, 11, 12], "across": [9, 12], "neg": [9, 14, 15], "ativ": 9, "po": [9, 10], "itiv": 9, "neu": 9, "tral": 9, "uestion": 9, "class": [9, 11, 12], "fasttext": [9, 15], "semi": 9, "supervis": [9, 11], "public": [9, 10, 14], "privat": 9, "72781": 9, "7499": 9, "63144": 9, "6131": 9, "71259": 9, "74194": 9, "73119": 9, "75859": 9, "One": 9, "time": [9, 11, 12], "73372": 9, "75968": 9, "kaggl": [9, 11, 15], "competit": 9, "upon": 9, "1st": 9, "place": 9, "solut": 9, "googl": [9, 11, 12, 13, 14, 15], "sklearn_crfsuit": [9, 11, 15], "emoji": [9, 10, 12, 14, 15], "fastai": [9, 11, 12, 15], "master": [9, 11, 15], "unzip": [9, 11], "mkdir": [9, 11], "wisesight_data": 9, "snippet": 9, "font": [9, 15], "matplotlib": [9, 11, 12, 15], "gist": 9, "korakot": 9, "9d7f5db632351dc92607fdec72a4953f": 9, "phonbopit": 9, "sarabun": [9, 15], "webfont": 9, "thsarabunnew": 9, "ttf": [9, 15], "cp": 9, "mpl": 9, "share": [9, 12], "truetyp": 9, "font_manag": [9, 15], "_rebuild": 9, "rc": 9, "famili": [9, 12], "load_ext": 9, "autoreload": [9, 15], "np": [9, 11, 12, 13, 14, 15], "panda": [9, 11, 12, 15], "pd": [9, 11, 12, 15], "tqdm_notebook": [9, 11, 12], "process_thai": [9, 11], "viz": [9, 11], "pyplot": [9, 11, 12, 15], "plt": [9, 11, 12, 15], "seaborn": [9, 11, 12, 14], "sn": [9, 11, 12, 14], "reload": 9, "reload_ext": [9, 15], "clean": [9, 11, 12], "rule": [9, 11], "aim": [9, 11], "spars": [9, 11], "bag": [9, 11], "pre_rul": [9, 11, 12], "post_rul": [9, 11, 12], "after": [9, 11], "\u0e32\u0e19\u0e19\u0e19\u0e19\u0e19": 9, "\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19": 9, "amp": [9, 12], "www": [9, 10, 13], "\u0e32\u0e19": [9, 11], "xxrep": [9, 11], "xxwrep": 9, "\u0e19\u0e32\u0e19": 9, "xxurl": 9, "open": [9, 10, 11, 12, 13, 15], "f": [9, 11, 12, 13, 14, 15], "strip": [9, 11, 12], "readlin": 9, "train_label": 9, "categori": 9, "all_df": [9, 11], "datafram": [9, 11, 15], "to_csv": [9, 11], "shape": [9, 11, 13, 15], "24063": 9, "test_df": [9, 11], "2674": 9, "map": 9, "lambda": 9, "x": [9, 12, 13, 15], "wc": 9, "uwc": 9, "preval": 9, "value_count": [9, 11], "544612": 9, "255164": 9, "178698": 9, "021527": 9, "dtype": [9, 11, 13], "float64": [9, 11], "85": 9, "under": [9, 14], "oversampl": 9, "balanc": [9, 11], "out": [9, 12], "littl": 9, "hyperparamet": 9, "sklearn": [9, 11, 14, 15], "model_select": 9, "train_test_split": 9, "train_df": [9, 11], "valid_df": 9, "test_siz": 9, "random_st": [9, 11], "1412": [9, 11], "reset_index": [9, 11], "drop": [9, 11], "actual": 9, "copi": [9, 11], "read_csv": [9, 11, 12], "head": [9, 11, 12, 15], "\u0e19\u0e04\u0e19\u0e25\u0e1a\u0e41\u0e2d\u0e1e": 9, "viu": 9, "\u0e19\u0e43\u0e08\u0e41\u0e25\u0e30\u0e40\u0e02": 9, "\u0e32\u0e43\u0e08\u0e40\u0e02\u0e32\u0e19\u0e30\u0e04\u0e30": 9, "\u0e41\u0e1c\u0e25\u0e21": 9, "\u0e25\u0e1a": 9, "\u0e41\u0e2d": 9, "\u0e19\u0e43\u0e08": 9, "\u0e40\u0e02": [9, 11], "\u0e32\u0e43\u0e08": 9, "\u0e40\u0e02\u0e32": 9, "\u0e44\u0e1b\u0e0a\u0e21\u0e44\u0e21": 9, "\u0e27\u0e02\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e41\u0e25\u0e30\u0e23\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e19\u0e08": [9, 14], "\u0e0a\u0e21": 9, "\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e23\u0e2d\u0e07": 9, "\u0e21\u0e23\u0e16\u0e0b": 9, "\u0e04\u0e40\u0e1b": 9, "\u0e19\u0e01\u0e25": 9, "\u0e21\u0e17": [9, 12], "\u0e32\u0e23\u0e33\u0e04\u0e32\u0e19\u0e21\u0e32\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01": 9, "\u0e23\u0e33": 9, "\u0e04\u0e32\u0e19": 9, "\u0e21\u0e32\u0e01": [9, 11], "\u0e2d\u0e22\u0e32\u0e01\u0e2a\u0e27\u0e22\u0e40\u0e2b\u0e21": 9, "\u0e2d\u0e19\u0e40\u0e08": 9, "\u0e32\u0e02\u0e2d\u0e07\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e04\u0e30": 9, "\u0e40\u0e19\u0e22": 9, "\u0e01\u0e32": [9, 11], "\u0e43\u0e1a\u0e2b\u0e19": 9, "\u0e2d\u0e22\u0e32\u0e01": 9, "\u0e2a\u0e27\u0e22": 9, "\u0e40\u0e2b\u0e21": 9, "\u0e32\u0e02\u0e2d\u0e07": 9, "\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e32\u0e27\u0e42\u0e16\u0e25\u0e30\u0e23": 9, "\u0e41\u0e1e\u0e07": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30\u0e15": 9, "\u0e01\u0e40\u0e1b": 9, "\u0e19\u0e08\u0e32\u0e19\u0e46\u0e25\u0e3015": 9, "\u0e42\u0e16": 9, "\u0e25\u0e30": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30": 9, "\u0e08\u0e32\u0e19": 9, "381": 9, "218": 9, "544957": 9, "253557": 9, "180071": 9, "021415": 9, "542659": 9, "264266": 9, "170914": 9, "022161": 9, "variabl": [9, 13], "y_train": [9, 11], "y_valid": 9, "faetur": 9, "feature_extract": [9, 11], "tfidfvector": 9, "linear_model": 9, "logisticregress": 9, "tfidf": [9, 11], "ngram_rang": [9, 11], "min_df": [9, 11], "sublinear_tf": 9, "tfidf_fit": 9, "text_train": 9, "text_valid": 9, "text_test": 9, "20453": 9, "4614": 9, "3610": 9, "top_feats_al": 9, "plot_top_feat": 9, "get_feature_nam": 9, "toarrai": 9, "448": 9, "492": 9, "940": 9, "938": 9, "rank": [9, 15], "score": [9, 11, 14], "ngram": 9, "029990": 9, "022852": 9, "020252": 9, "\u0e40\u0e25\u0e22": [9, 11], "019493": 9, "018153": 9, "852": 9, "862": 9, "73": [9, 14], "count": 9, "uniqu": [9, 12], "might": [9, 12, 13], "so": [9, 12], "preprocess": [9, 11, 14], "standardscal": 9, "scaler": 9, "scaler_fit": 9, "float": [9, 13], "mean_": 9, "var_": 9, "num_train": 9, "num_valid": 9, "num_test": 9, "96529942": 9, "22744462": 9, "1151": 9, "47512883": 9, "513": 9, "46009207": 9, "74": 9, "concaten": [9, 13, 14], "x_train": [9, 11], "axi": [9, 13, 15], "x_valid": 9, "x_test": [9, 11], "4616": 9, "75": 9, "penalti": [9, 11], "l2": [9, 11], "solver": 9, "liblinear": 9, "dual": 9, "multi_class": [9, 11], "ovr": [9, 11], "7324099722991689": 9, "76": 9, "prob": [9, 11], "predict_proba": 9, "probs_df": 9, "column": [9, 11, 15], "classes_": 9, "pred": [9, 11], "hit": 9, "probs_df_linear": 9, "77": 9, "confusion_matrix": 9, "conf_mat": 9, "heatmap": [9, 14], "annot": [9, 14, 15], "xticklabel": [9, 14], "yticklabel": [9, 14], "ylabel": 9, "xlabel": 9, "callback": [9, 11, 12], "csvlogger": [9, 11, 12], "savemodelcallback": 9, "tt": [9, 11, 12], "tok_func": [9, 11, 12], "thaitoken": [9, 11, 12], "lang": [9, 11, 12], "pre_rules_th": [9, 11, 12], "post_rules_th": [9, 11, 12], "tokenizeprocessor": [9, 11, 12], "chunksiz": [9, 11, 12], "10000": [9, 11, 12], "mark_field": [9, 11, 12], "numericalizeprocessor": [9, 11, 12], "vocab": [9, 11, 12, 13, 15], "max_vocab": [9, 11, 12], "60000": [9, 11, 12], "data_lm": [9, 11, 12], "textlist": [9, 11, 12], "from_df": [9, 11, 12], "col": [9, 11, 12], "split_by_rand_pct": [9, 12], "valid_pct": [9, 11], "seed": [9, 11], "label_for_lm": [9, 11, 12], "databunch": [9, 11, 12], "sanity_check": [9, 11, 12], "wisesight_lm": 9, "pkl": [9, 11, 15], "train_d": [9, 11], "valid_d": [9, 11], "23823": 9, "240": [9, 10], "emb_sz": [9, 11, 12], "400": [9, 11, 12], "n_hid": [9, 11, 12], "1550": [9, 11, 12], "n_layer": [9, 11, 12], "pad_token": [9, 11, 12], "qrnn": [9, 11, 12], "tie_weight": [9, 11, 12], "out_bia": [9, 11, 12], "output_p": [9, 11, 12], "hidden_p": [9, 11, 12], "input_p": [9, 11, 12], "embed_p": [9, 11, 12], "weight_p": [9, 11, 12], "trn_arg": [9, 11, 12], "drop_mult": [9, 11, 12], "clip": [9, 11, 12], "alpha": [9, 11, 12], "beta": [9, 11, 12], "language_model_learn": [9, 11, 12], "awd_lstm": [9, 11, 12], "load_pretrain": [9, 11, 12], "_thwiki_lstm": [9, 11, 12], "languagelearn": [9, 12], "textlmdatabunch": [9, 12], "labellist": [9, 12], "item": [9, 12, 13], "lmtextlist": [9, 12], "xxbo": [9, 11, 12], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e40\u0e23\u0e32": [9, 11], "\u0e1c\u0e25": 9, "\u0e07\u0e2d\u0e2d\u0e01": 9, "\u0e22\u0e32\u0e2a": 9, "\u0e40\u0e22\u0e2d\u0e30": [9, 11], "\u0e42\u0e25\u0e01": 9, "\u0e2d\u0e2d\u0e21": 9, "\u0e40\u0e04": [9, 15], "\u0e41\u0e19\u0e19": 9, "\u0e2d\u0e30\u0e44\u0e23": [9, 11], "\u0e19\u0e30": 9, "lmlabellist": [9, 12], "path": [9, 12, 13], "\u0e19\u0e30\u0e04\u0e30": [9, 11, 15], "\u0e41\u0e1c\u0e25": 9, "\u0e41\u0e16\u0e21": 9, "\u0e2d\u0e32\u0e23\u0e21\u0e13": 9, "\u0e42\u0e14\u0e19": 9, "xxunk": [9, 11, 12], "\u0e40\u0e19\u0e2d\u0e30": 9, "\u0e27\u0e19": [9, 11], "\u0e17\u0e32\u0e07": [9, 11], "\u0e01\u0e2d\u0e14": 9, "netflix": 9, "\u0e41\u0e19": [9, 11], "\u0e17\u0e33\u0e23": 9, "\u0e19\u0e2d\u0e19": 9, "\u0e1a\u0e15\u0e01": 9, "\u0e01\u0e32\u0e23\u0e41\u0e02": 9, "\u0e41\u0e2a\u0e07\u0e42\u0e2a\u0e21": 9, "\u0e2a\u0e19": 9, "\u0e01\u0e40\u0e01\u0e2d\u0e23": 9, "\u0e41\u0e14\u0e07": [9, 11], "\u0e42\u0e2d\u0e40\u0e1e": 9, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e1b": 9, "2560": 9, "\u0e2a\u0e19\u0e32\u0e21": 9, "\u0e04\u0e25": 9, "\u0e0b\u0e2d\u0e22": 9, "\u0e42\u0e0a\u0e04": 9, "\u0e25\u0e32\u0e14\u0e1e\u0e23": 9, "\u0e2d\u0e27\u0e14": 9, "\u0e17\u0e33\u0e44\u0e21": 9, "\u0e01\u0e04\u0e19": 9, "\u0e1e\u0e27\u0e01": 9, "\u0e1a\u0e2d": 9, "\u0e01\u0e27": [9, 11], "\u0e19\u0e21": [9, 14], "\u0e40\u0e1a\u0e25\u0e2d": 9, "\u0e43\u0e2a": 9, "\u0e02\u0e19\u0e32\u0e14": 9, "\u0e13\u0e41\u0e21": 9, "\u0e19\u0e30\u0e40\u0e19": 9, "\u0e40\u0e1b\u0e25": 9, "\u0e40\u0e2d\u0e07": 9, "\u0e27\u0e22\u0e15": 9, "\u0e21\u0e32\u0e2a": 9, "\u0e01\u0e42\u0e0a": 9, "\u0e32\u0e21\u0e04": 9, "cho": 9, "cosmet": 9, "daradaili": 9, "\u0e14\u0e32\u0e23\u0e32": 9, "\u0e40\u0e14\u0e25": 9, "\u0e04\u0e19\u0e44\u0e17\u0e22": 9, "\u0e19\u0e02": 9, "\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e2b\u0e21": [9, 11, 15], "\u0e19\u0e25\u0e21": 9, "\u0e09\u0e30": 9, "\u0e42\u0e25": 9, "\u0e21\u0e30\u0e25": 9, "\u0e2d\u0e40\u0e1b\u0e25": 9, "250": 9, "\u0e02\u0e32\u0e22": [9, 11], "160": 9, "\u0e40\u0e22\u0e2d\u0e30\u0e41\u0e22\u0e30": 9, "\u0e01\u0e33\u0e44\u0e23": 9, "\u0e04\u0e27\u0e23": 9, "\u0e32\u0e27\u0e1c": 9, "\u0e43\u0e2b\u0e0d": 9, "300": [9, 11, 15], "\u0e16\u0e32\u0e14": 9, "\u0e32\u0e19\u0e1a\u0e19": 9, "80": [9, 11], "\u0e0a\u0e32\u0e40\u0e22": 9, "\u0e02\u0e27\u0e14": 9, "\u0e19\u0e41\u0e01": 9, "\u0e1e\u0e2d\u0e41\u0e25": 9, "\u0e40\u0e1a": [9, 11], "\u0e22\u0e23": 9, "120": 9, "\u0e32\u0e40\u0e01\u0e25": 9, "\u0e22\u0e14": 9, "\u0e21\u0e32": [9, 11], "360": [9, 11], "\u0e33\u0e41\u0e02": 9, "\u0e1e\u0e2d\u0e44\u0e14": 9, "\u0e2d\u0e32\u0e01\u0e32\u0e28": 9, "\u0e25\u0e30\u0e25\u0e32\u0e22": 9, "\u0e1e\u0e2d": 9, "\u0e17\u0e30\u0e40\u0e25": 9, "\u0e40\u0e1c\u0e32": 9, "\u0e25\u0e27\u0e01": 9, "\u0e32\u0e15\u0e32": 9, "\u0e01\u0e25\u0e32\u0e07\u0e46": [9, 11], "\u0e15\u0e33": 9, "\u0e41\u0e1b": 9, "\u0e21\u0e22\u0e33": 9, "\u0e2b\u0e23\u0e2d\u0e01": 9, "\u0e15\u0e23\u0e07": 9, "\u0e44\u0e1f": 9, "\u0e19\u0e43\u0e19": 9, "\u0e41\u0e17\u0e1a": 9, "\u0e41\u0e15\u0e30": 9, "\u0e19\u0e2d\u0e01": 9, "\u0e41\u0e22": 9, "\u0e40\u0e2d\u0e32\u0e40\u0e1b\u0e23": 9, "\u0e22\u0e1a": 9, "\u0e19\u0e40\u0e2d\u0e07": [9, 12], "\u0e2d\u0e32\u0e2b\u0e32\u0e23": [9, 11], "\u0e1a\u0e02": 9, "\u0e15\u0e32\u0e21": 9, "\u0e41\u0e04": [9, 11, 14], "\u0e40\u0e08\u0e2d": [9, 11], "\u0e41\u0e1a\u0e1a\u0e19": 9, "\u0e2a\u0e07\u0e2a": 9, "\u0e2d\u0e04": 9, "\u0e15\u0e32\u0e22": 9, "\u0e04\u0e32": 9, "\u0e43\u0e04\u0e23": [9, 11], "\u0e21\u0e2d\u0e07": 9, "\u0e32\u0e41\u0e23\u0e07": 9, "\u0e27\u0e19\u0e21\u0e32\u0e01": 9, "\u0e04\u0e19\u0e43\u0e19": 9, "\u0e32\u0e41\u0e23\u0e07\u0e02": 9, "\u0e02\u0e22": 9, "\u0e40\u0e25": [9, 10, 11, 15], "\u0e01\u0e19": [9, 11], "\u0e04\u0e19\u0e08\u0e19": 9, "\u0e04\u0e19\u0e23\u0e27\u0e22": 9, "\u0e01\u0e16": [9, 11], "\u0e2a\u0e21\u0e04\u0e27\u0e23": 9, "\u0e19\u0e41\u0e25\u0e30\u0e01": 9, "\u0e0a\u0e2d\u0e1a": [9, 10, 11], "\u0e19\u0e08\u0e23": 9, "\u0e0a\u0e32\u0e27\u0e15": 9, "\u0e32\u0e07\u0e0a\u0e32\u0e15": 9, "\u0e40\u0e16\u0e2d\u0e30": 9, "\u0e42\u0e2d\u0e01\u0e32\u0e2a": 9, "sequentialrnn": [9, 12], "encod": [9, 11, 12, 13, 14], "15000": 9, "padding_idx": [9, 12], "encoder_dp": [9, 12], "embeddingdropout": [9, 12], "emb": [9, 12], "rnn": [9, 10, 12], "modulelist": [9, 12, 13], "weightdropout": [9, 12], "lstm": [9, 12], "batch_first": [9, 12], "input_dp": [9, 12], "rnndropout": [9, 12], "hidden_dp": [9, 12], "lineardecod": [9, 12], "decod": [9, 12, 14], "linear": [9, 12, 13], "in_featur": [9, 12, 13], "out_featur": [9, 12, 13], "bia": [9, 10, 12, 13, 14], "output_dp": [9, 12], "opt_func": [9, 11, 12], "functool": [9, 12, 14], "partial": [9, 11, 12, 14], "optim": [9, 11, 12, 13], "adam": [9, 11, 12], "loss_func": [9, 12], "flattenedloss": [9, 12], "crossentropyloss": [9, 12], "0x7f51be568268": 9, "true_wd": [9, 12], "bn_wd": [9, 12], "wd": [9, 12], "train_bn": [9, 12], "posixpath": [9, 12], "model_dir": [9, 12], "callback_fn": [9, 12], "basic_train": [9, 12], "record": [9, 11, 12], "add_tim": [9, 12], "silent": [9, 12], "gradientclip": [9, 12], "rnntrainer": [9, 12], "layer_group": [9, 12], "sequenti": [9, 12], "cb_fns_regist": 9, "frozen": [9, 11], "freeze_to": [9, 11], "fit_one_cycl": [9, 11], "mom": [9, 11], "epoch": [9, 11], "train_loss": [9, 11], "valid_loss": [9, 11], "841187": 9, "462714": 9, "319742": 9, "unfrozen": [9, 11], "unfreez": [9, 11], "411834": 9, "205552": 9, "341766": 9, "03": 9, "178030": 9, "037095": 9, "361508": 9, "970388": 9, "930919": 9, "370139": 9, "756190": 9, "890398": 9, "376191": 9, "671704": 9, "890232": 9, "375595": 9, "save_encod": [9, 11], "wisesight_enc": 9, "lm": 9, "load_data": [9, 11], "data_cl": [9, 11], "itemlist": 9, "label_from_df": [9, 11], "ito": [9, 11, 12], "bptt": [9, 11], "500": [9, 15], "text_classifier_learn": [9, 11], "load_encod": [9, 11], "rnnlearner": 9, "textclasdatabunch": 9, "\u0e19\u0e41\u0e14\u0e14": 9, "\u0e40\u0e1e\u0e25\u0e2a": 9, "\u0e27\u0e43\u0e2b\u0e21": 9, "\u0e08\u0e23": [9, 11, 15], "\u0e42\u0e0b\u0e19": 9, "\u0e40\u0e27": 9, "\u0e2b\u0e25\u0e2d\u0e14": 9, "\u0e22\u0e32\u0e27": 9, "\u0e1d\u0e32": 9, "\u0e40\u0e2d\u0e32": [9, 11], "\u0e1e\u0e1a": 9, "\u0e25\u0e1b": 9, "soul": [9, 12], "pop": 9, "\u0e2a\u0e32\u0e21": 9, "\u0e2a\u0e44\u0e15\u0e25": 9, "\u0e07\u0e32\u0e19": [9, 11], "jamnight": 9, "\u0e19\u0e33": 9, "parkinson": 9, "xxup": 9, "toi": 9, "\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01": 9, "\u0e42\u0e0a\u0e27": 9, "\u0e41\u0e1a\u0e1a": 9, "\u0e1b\u0e41\u0e1a\u0e1a": 9, "\u0e27\u0e07": 9, "\u0e41\u0e08\u0e21": 9, "\u0e1e\u0e25\u0e32\u0e14": 9, "\u0e40\u0e08\u0e2d\u0e01": 9, "\u0e19\u0e22\u0e32\u0e22\u0e19": 9, "\u0e1b\u0e23\u0e30\u0e15": 9, "\u0e2a\u0e32\u0e21\u0e32\u0e23\u0e16": 9, "\u0e15\u0e23": [9, 14, 15], "event": 9, "go": [9, 12], "eventpop": 9, "me": [9, 12, 15], "\u0e08\u0e33\u0e01": 9, "\u0e2d\u0e32\u0e22": 9, "jamnightbyjameson": 9, "jamesonthailand": 9, "soulaftersix": 9, "theparkinson": 9, "thetoi": 9, "\u0e21\u0e30": 9, "\u0e1a\u0e2d\u0e01\u0e15": 9, "\u0e41\u0e1e": [9, 11], "\u0e40\u0e22": 9, "\u0e1e\u0e2d\u0e19": 9, "\u0e41\u0e15\u0e07\u0e42\u0e21": 9, "\u0e25\u0e14": 9, "\u0e2a\u0e07\u0e01\u0e23\u0e32\u0e19\u0e15": 9, "\u0e23\u0e2d\u0e14": 9, "555": 9, "categorylist": 9, "multibatchencod": 9, "poolinglinearclassifi": 9, "layer": [9, 13, 14], "batchnorm1d": 9, "1200": 9, "ep": [9, 13], "momentum": 9, "affin": 9, "track_running_stat": 9, "dropout": [9, 13], "27999999999999997": 9, "relu": 9, "inplac": [9, 13], "2e": [9, 11], "slice": [9, 11], "5e": [9, 11], "improv": 9, "monitor": 9, "bestmodel": 9, "take": [9, 12, 15], "script": [9, 12, 14], "train_model": 9, "812156": 9, "753478": 9, "687532": 9, "740403": 9, "699093": 9, "714394": 9, "727394": 9, "668807": 9, "723011": 9, "722163": 9, "675351": 9, "723517": 9, "675266": 9, "654477": 9, "738723": 9, "669178": 9, "641070": 9, "737962": 9, "612528": 9, "637456": 9, "744551": 9, "618259": 9, "635149": 9, "749366": 9, "572621": 9, "651169": 9, "749873": 9, "561985": 9, "661739": 9, "747593": 9, "534753": 9, "673563": 9, "738469": 9, "530844": 9, "688871": 9, "746072": 9, "522788": 9, "670024": 9, "743031": 9, "y_true": 9, "loss": [9, 11], "get_pr": [9, 11], "ds_type": 9, "datasettyp": [9, 11], "with_loss": 9, "argmax": [9, 11, 13], "to_df": 9, "8392661555312158": 9, "u": [10, 12, 13, 14], "look": [10, 12, 13, 14], "pypi": [10, 14], "pkg": [10, 14], "attempt": [10, 13], "dependency_pars": 10, "esupar": 10, "chu": 10, "liu": 10, "edmond": 10, "chu_liu_edmond": 10, "cp38": 10, "107": 10, "supar": 10, "93": 10, "2022": [10, 14, 15], "304": 10, "dill": [10, 15], "cu116": 10, "stanza": 10, "691": 10, "huggingfac": [10, 13, 14], "hub": [10, 13, 14], "huggingface_hub": [10, 13, 14], "182": 10, "jinja2": 10, "smart": 10, "pathi": 10, "langcod": 10, "pydant": 10, "logger": 10, "legaci": 10, "typer": 10, "protobuf": [10, 13], "confect": 10, "markupsaf": 10, "5626945": 10, "6613dcb188f57561a00a2e40eca1bbafe6203936b8d9c387facd79de3f06fa62": 10, "6f": 10, "3475485c7d991ca5698d39603e22a99bd6904dcac7d0a5855a": 10, "234926": 10, "e3b7a3e928e5e81053b9f869cfef5382b49f133284c6abbd718496ff11e8ee67": 10, "a1": 10, "b0bb1f7683d20b75b34ceeb56ee83a585e9b065a5fef0b2cb1": 10, "warn": [10, 13, 14], "broken": 10, "permiss": 10, "conflict": 10, "behaviour": 10, "manag": 10, "recommend": [10, 14], "virtual": 10, "environ": 10, "pypa": 10, "venv": 10, "spacy_pythainlp": 10, "dev6": 10, "nptype": 10, "473": 10, "docopt": 10, "fire": 10, "termcolor": 10, "13723": 10, "cd282751c98736c79933ed4265624e65891888bb9fdd01dc5d6fcf978d76431f": 10, "cc": 10, "f1e272f628fdb013d969acc99cfe2e031ea15b3efb74ffe842": 10, "116949": 10, "bc82a0082e9931af28c40d49e4494ce66a1f80f929b30ae4e7e1eff347b37c5c": 10, "86": 10, "88e8603bd3b1a9bff9d02d820c7431c47ad032865632657bb9": 10, "cuda": [10, 11], "__init__": 10, "497": 10, "userwarn": [10, 13, 14], "initi": [10, 11, 13, 14], "nvml": 10, "pos_engin": 10, "pos_corpu": 10, "orchid_ud": 10, "sent_engin": 10, "ner_engin": 10, "tokenize_engin": 10, "dependency_parsing_engin": 10, "dependency_parsing_model": 10, "bool": 10, "chang": [10, 12], "turn": [10, 12], "part": [10, 14], "speech": [10, 13], "off": [10, 12], "0x7f9c02410a90": 10, "\u0e1c\u0e21\u0e40\u0e1b": 10, "\u0e19\u0e41\u0e21\u0e27": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a\u0e44\u0e1b\u0e40\u0e25": 10, "\u0e22\u0e19\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e21\u0e22": 10, "free": [10, 15], "commerci": 10, "pleas": 10, "contract": 10, "nectec": 10, "facebook": [10, 14, 15], "dancearmi": 10, "post": [10, 13], "10157641945708284": 10, "pos_lst20_perceptron": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a": 10, "\u0e42\u0e23\u0e07": 10, "\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e44\u0e1b\u0e40\u0e25": 10, "0x7f9c0146e880": 10, "weight": [10, 13, 14], "checkpoint": [10, 14], "koichiyasuoka": 10, "roberta": [10, 14], "spm": [10, 14], "upo": 10, "were": [10, 12, 14], "robertamodel": [10, 14], "classifi": [10, 11], "expect": [10, 14, 15], "anoth": [10, 14], "architectur": [10, 14], "bertforsequenceclassif": [10, 14], "bertforpretrain": [10, 14], "NOT": [10, 14], "exactli": [10, 14], "ident": [10, 14], "newli": [10, 14], "pooler": [10, 14], "dens": [10, 14], "should": [10, 12, 14], "probabl": [10, 14, 15], "down": [10, 12, 14], "stream": [10, 14, 15], "abl": [10, 14], "infer": [10, 14], "info": 10, "n_sentenc": 10, "n_batch": 10, "n_bucket": 10, "make": [10, 11, 12, 14], "apply_permut": 10, "tensor": [10, 11], "index_select": 10, "dim": [10, 11, 14], "permut": 10, "204603": 10, "elaps": 10, "dep": 10, "pron": 10, "sconj": 10, "nsubj": 10, "cop": 10, "acl": 10, "xcomp": 10, "obl": 10, "flat": 10, "star": [11, 12], "multi": 11, "both": [11, 12, 14], "number": 11, "micro": 11, "averag": 11, "f1": 11, "challeng": [11, 12], "micro_f1_publ": 11, "micro_f1_priv": 11, "59313": 11, "60322": 11, "5145": 11, "5109": 11, "5022": 11, "4976": 11, "59139": 11, "58139": 11, "bert": [11, 14], "56612": 11, "57057": 11, "review_dataset": 11, "wongnai_data": 11, "ast": [11, 12], "literal_ev": [11, 12], "counter": [11, 12], "re": [11, 12, 13, 15], "ft_data": 11, "respect": 11, "w_review_train": 11, "csv": [11, 12], "sep": [11, 12], "header": 11, "drop_dupl": 11, "rate": 11, "test_fil": 11, "concat": 11, "469282": 11, "304328": 11, "169880": 11, "046133": 11, "010377": 11, "two_df": 11, "one_df": 11, "train_bal": 11, "392365": 11, "254448": 11, "142036": 11, "115715": 11, "095436": 11, "dump": [11, 12, 14, 15], "skipgram": 11, "df_txt": 11, "df": 11, "ft_line": 11, "iterrow": 11, "ft_lab": 11, "__label__": 11, "ft_text": 11, "replace_newlin": 11, "close": [11, 14], "__label__0": 11, "df_all": 11, "home": 11, "charin": 11, "pretrainedvector": 11, "vec": 11, "input": [11, 13, 14], "1m": 11, "18176": 11, "progress": 11, "sec": 11, "thread": 11, "24858": 11, "lr": 11, "000000": 11, "309402": 11, "0h0m": 11, "wongnai_b": 11, "wordngram": 11, "731006": 11, "391282": 11, "764689": 11, "81": 11, "bin": [11, 15], "pred_lab": 11, "split": [11, 13, 15], "submit_df": 11, "reviewid": 11, "submit_fastttext_b": 11, "lukkiddd": 11, "train_split": 11, "test_split": 11, "pipelin": [11, 14], "countvector": 11, "tfidftransform": 11, "svm": 11, "text_clf": 11, "vect": 11, "clf": 11, "fit": 11, "memori": [11, 12], "binari": [11, 15], "decode_error": 11, "strict": 11, "int64": 11, "utf": [11, 13], "lowercas": 11, "max_df": 11, "max_featur": 11, "preprocessor": 11, "stop_word": 11, "ax_it": 11, "tol": 11, "0001": 11, "verbos": 11, "onehotencod": 11, "enc": 11, "handle_unknown": 11, "submit_linearsvc": 11, "59590": 11, "59731": 11, "processor": [11, 12, 13], "random_split_by_pct": 11, "wongnai_lm": 11, "45735": 11, "461": 11, "show_batch": 11, "idx": 11, "\u0e14\u0e32\u0e27": 11, "\u0e2b\u0e21\u0e14": 11, "\u0e0b\u0e30": 11, "\u0e32\u0e27\u0e2a\u0e27\u0e22": 11, "\u0e21\u0e32\u0e13": 11, "\u0e1e\u0e2d\u0e14": 11, "\u0e18\u0e22\u0e32\u0e28": 11, "\u0e1a\u0e23\u0e2d\u0e07": 11, "\u0e1a\u0e21\u0e32": 11, "\u0e2d\u0e22\u0e46": 11, "\u0e41\u0e16\u0e27": 11, "\u0e25\u0e2d\u0e07": 11, "\u0e41\u0e27\u0e30": 11, "\u0e2a\u0e33\u0e2b\u0e23": 11, "\u0e23\u0e2a": 11, "\u0e2d\u0e07\u0e14": 11, "\u0e21\u0e32\u0e01\u0e21\u0e32\u0e22": 11, "\u0e04\u0e07": 11, "\u0e42\u0e01\u0e42\u0e01": 11, "top": [11, 12], "\u0e22\u0e14\u0e32\u0e22": 11, "\u0e2b\u0e32": 11, "\u0e15\u0e2d\u0e19": 11, "\u0e27\u0e22\u0e40\u0e15": 11, "\u0e40\u0e19": 11, "\u0e17\u0e32\u0e19": 11, "\u0e2d\u0e19\u0e02": 11, "\u0e22\u0e32\u0e01": 11, "\u0e27\u0e32": 11, "\u0e2a\u0e32\u0e02\u0e32": 11, "\u0e12\u0e19\u0e32\u0e01\u0e32\u0e23": 11, "\u0e1d\u0e32\u0e01": 11, "\u0e2d\u0e01": [11, 14], "\u0e2b\u0e25\u0e32\u0e22\u0e2d\u0e22": 11, "\u0e1a\u0e23": 11, "\u0e01\u0e30": 11, "\u0e01\u0e2a\u0e32\u0e27": 11, "\u0e32\u0e02\u0e2d\u0e07\u0e23": 11, "\u0e08\u0e32": 11, "\u0e04\u0e27\u0e32\u0e21\u0e04": 11, "\u0e14\u0e40\u0e2b": 11, "\u0e27\u0e19\u0e15": 11, "\u0e2d\u0e2d\u0e01": 11, "\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07": 11, "\u0e1a\u0e27\u0e01": 11, "\u0e27\u0e19\u0e43\u0e2b\u0e0d": 11, "\u0e1a\u0e23\u0e23\u0e22\u0e32\u0e01\u0e32\u0e28": 11, "\u0e23\u0e16\u0e40\u0e02": 11, "\u0e42\u0e15": 11, "\u0e15\u0e01\u0e41\u0e15": 11, "\u0e19\u0e41\u0e19\u0e27": 11, "\u0e1a\u0e32\u0e23": 11, "\u0e42\u0e14\u0e22\u0e23\u0e2d\u0e1a": 11, "\u0e19\u0e23\u0e32": 11, "\u0e40\u0e21\u0e19": [11, 14], "next": [11, 12], "train_dl": 11, "414": 11, "3408": 11, "135": 11, "409": 11, "1325": 11, "1185": 11, "9903": 11, "368": 11, "870": 11, "254": 11, "3448": 11, "429": 11, "devic": 11, "193": 11, "10074": 11, "258": 11, "456": 11, "270": 11, "\u0e1a\u0e1e": 11, "\u0e2d\u0e07\u0e40\u0e2a": 11, "temperatur": [11, 12], "\u0e44\u0e2b\u0e21": 11, "mr": [11, 12], "\u0e04\u0e0a": 11, "\u0e09\u0e32\u0e22": 11, "2557": 11, "\u0e01\u0e33\u0e01": [11, 15], "\u0e1b\u0e1b": 11, "\u0e20\u0e32\u0e04": 11, "\u0e42\u0e23\u0e07\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 11, "2558": 11, "\u0e2d\u0e2b\u0e32": 11, "\u0e22\u0e27\u0e01": [11, 14], "lr_find": 11, "plot": [11, 12, 15], "finder": 11, "complet": 11, "learner_nam": 11, "graph": [11, 13], "min": 11, "gradient": [11, 14], "58e": 11, "04": [11, 13, 15], "22562": 11, "659182": 11, "493942": 11, "342857": 11, "375606": 11, "252919": 11, "385714": 11, "165419": 11, "013862": 11, "371429": 11, "034220": 11, "802707": 11, "357143": 11, "879111": 11, "712463": 11, "823682": 11, "624331": 11, "784611": 11, "580608": 11, "753532": 11, "553170": 11, "719396": 11, "516521": 11, "699165": 11, "513339": 11, "696516": 11, "512542": 11, "wongnai_enc": 11, "\u0e32\u0e19\u0e19": 11, "\u0e08\u0e30\u0e2d\u0e22": 11, "\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07": 11, "\u0e2d\u0e2d\u0e19": 11, "\u0e40\u0e25\u0e22\u0e41\u0e22\u0e01\u0e1a": 11, "\u0e07\u0e44\u0e1b2": [11, 14], "\u0e0a\u0e09\u0e30\u0e25\u0e32\u0e40\u0e15": [11, 14], "\u0e44\u0e2d\u0e28\u0e04\u0e23": [11, 14], "\u0e21\u0e0a\u0e32\u0e40\u0e02": [11, 14], "\u0e27\u0e27\u0e07\u0e40\u0e14": 11, "n\u0e2b": 11, "\u0e27\u0e14": [11, 14], "\u0e01\u0e46": 11, "\u0e15\u0e23\u0e30\u0e40\u0e27\u0e19\u0e2b\u0e32\u0e23": 11, "\u0e32\u0e19\u0e17\u0e32\u0e19": 11, "\u0e21\u0e32\u0e40\u0e08\u0e2d": 11, "\u0e08\u0e30\u0e27": 11, "\u0e19\u0e40\u0e08": 11, "\u0e32\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e01": 11, "\u0e04\u0e07\u0e44\u0e21": 11, "\u0e32\u0e04": 11, "\u0e14\u0e16": 11, "\u0e07\u0e2a\u0e25": 11, "\u0e14\u0e1c\u0e21\u0e04": 11, "\u0e32\u0e19\u0e41\u0e23\u0e01\u0e46\u0e40\u0e25\u0e22\u0e04\u0e23": 11, "add_test": 11, "wongnai_cl": 11, "sure": [11, 14], "got": [11, 12], "target": 11, "\u0e1e\u0e32": 11, "\u0e2d\u0e32\u0e40\u0e0b": 11, "\u0e23\u0e23": 11, "\u0e32\u0e1e\u0e23\u0e30\u0e22\u0e32\u0e1b\u0e32\u0e23": 11, "\u0e0a\u0e14\u0e32\u0e20": 11, "\u0e40\u0e29\u0e01": 11, "\u0e19\u0e01\u0e32\u0e23": 11, "\u0e0a\u0e27\u0e19": 11, "\u0e32\u0e40\u0e14": 11, "\u0e19\u0e40\u0e04\u0e22": 11, "\u0e07\u0e46": 11, "\u0e23\u0e16\u0e15": 11, "\u0e1e\u0e24\u0e28\u0e08": 11, "\u0e01\u0e32\u0e22\u0e19": 11, "\u0e32\u0e19\u0e21\u0e32": 11, "\u0e27\u0e07\u0e43\u0e19": 11, "\u0e14\u0e01": 11, "\u0e08\u0e01\u0e23\u0e23\u0e21": 11, "xxmaj": 11, "relax": 11, "night": [11, 12], "phothalai": 11, "\u0e21\u0e15": 11, "tast": 11, "\u0e2d\u0e07\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e2d\u0e19\u0e23": 11, "group": [11, 13, 15], "\u0e0d\u0e2b\u0e32": 11, "\u0e27\u0e16": 11, "\u0e01\u0e32\u0e23\u0e2a": 11, "\u0e2d\u0e2a\u0e32\u0e23": 11, "\u0e1e\u0e19": 11, "\u0e01\u0e07\u0e32\u0e19": 11, "\u0e21\u0e32\u0e16": 11, "terrac": 11, "\u0e2d\u0e07\u0e08\u0e32\u0e01": 11, "\u0e08\u0e19": 11, "\u0e17\u0e19": 11, "\u0e01\u0e23\u0e30\u0e41\u0e2a": 11, "\u0e04\u0e27\u0e32\u0e21\u0e41\u0e23\u0e07": 11, "shibuya": 11, "shabu": 11, "\u0e44\u0e2b\u0e27": 11, "\u0e02\u0e2d": 11, "\u0e15\u0e32\u0e21\u0e23\u0e2d\u0e22": 11, "\u0e2d\u0e07\u0e2b\u0e32": 11, "\u0e42\u0e2d": 11, "\u0e2a\u0e21\u0e32\u0e17\u0e32\u0e19": 11, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e2a": 11, "\u0e0a\u0e32": 11, "\u0e40\u0e1e": [11, 12], "\u0e0a\u0e32\u0e27": 11, "\u0e01\u0e04\u0e23": 11, "pednoii": 11, "ahha": 11, "\u0e32\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e41\u0e23\u0e01": 11, "\u0e19\u0e33\u0e40\u0e2a\u0e19\u0e2d": 11, "\u0e19\u0e32\u0e07\u0e43\u0e19": 11, "31e": 11, "07": 11, "gradual": 11, "187845": 11, "158394": 11, "472803": 11, "889035": 11, "828990": 11, "629707": 11, "760357": 11, "751162": 11, "656904": 11, "628719": 11, "721673": 11, "669456": 11, "submit_ulmfit": 11, "ulmfit": 12, "thwiki_lstm": 12, "dummi": 12, "imdb": 12, "untar_data": 12, "url": 12, "imdb_sampl": 12, "dummy_df": 12, "thwiki_ito": 12, "pickl": [12, 15], "itos_fnam": 12, "rb": [12, 15], "thwiki_vocab": 12, "check": 12, "60005": 12, "800": 12, "film": 12, "act": 12, "music": 12, "good": 12, "too": [12, 14], "though": 12, "mostli": 12, "earli": 12, "thing": 12, "still": 12, "realli": 12, "superstar": 12, "cast": 12, "sever": 12, "face": [12, 13], "entir": 12, "excel": 12, "job": 12, "hard": 12, "watch": 12, "becaus": [12, 15], "situat": 12, "present": 12, "british": 12, "against": 12, "each": [12, 14, 15], "merit": 12, "view": 12, "forc": 12, "region": 12, "thei": [12, 14], "did": 12, "around": 12, "partit": 12, "simpli": [12, 14], "saw": 12, "between": [12, 14], "enough": 12, "veri": 12, "rememb": 12, "screen": 12, "never": 12, "paint": 12, "side": 12, "hope": 12, "younger": 12, "redempt": 12, "man": 12, "who": 12, "her": 12, "life": 12, "truli": 12, "love": 12, "later": 12, "she": 12, "great": 12, "pain": 12, "carri": 12, "messag": 12, "grave": 12, "peopl": 12, "realiti": 12, "sinc": [12, 15], "india": 12, "pakistan": 12, "border": 12, "sens": 12, "glad": 12, "seen": 12, "even": 12, "uk": 12, "could": [12, 15], "would": [12, 15], "better": 12, "onc": 12, "long": 12, "while": [12, 15], "movi": 12, "along": 12, "feel": 12, "labor": 12, "my": 12, "joi": 12, "where": [12, 13], "five": 12, "stereotyp": 12, "had": 12, "gui": 12, "fat": 12, "foreign": 12, "etc": 12, "being": [12, 13], "written": 12, "shot": 12, "product": 12, "low": 12, "junior": 12, "high": [12, 14], "video": 12, "director": 12, "produc": [12, 13], "ever": 12, "wors": 12, "entri": 12, "concept": 12, "funni": 12, "gari": 12, "coleman": 12, "actor": 12, "trust": 12, "sai": [12, 14], "went": 12, "dad": 12, "came": 12, "korea": 12, "he": 12, "short": [12, 14], "period": 12, "made": 12, "epic": 12, "imagin": 12, "cost": 12, "cheap": 12, "theme": 12, "duti": 12, "lip": 12, "offic": 12, "deep": 12, "declar": 12, "hi": 12, "peck": 12, "liber": 12, "understand": 12, "fearless": 12, "human": 12, "ve": 12, "fact": 12, "tail": 12, "mess": 12, "almost": 12, "walk": 12, "paid": 12, "ll": 12, "sit": 12, "bit": 12, "lose": 12, "its": 12, "someth": [12, 14], "ed": 12, "wood": 12, "dialogu": 12, "heard": 12, "viewer": 12, "cannot": [12, 13], "meet": 12, "oper": 12, "soon": 12, "stephen": 12, "best": 12, "ultim": 12, "tara": 12, "reid": 12, "plai": 12, "role": 12, "oh": 12, "help": 12, "talent": 12, "actress": 12, "stick": 12, "american": 12, "pie": 12, "know": 12, "kick": 12, "clich": 12, "\u00e9": 12, "typic": 12, "member": 12, "william": 12, "benton": 12, "believ": 12, "bias": 12, "toward": 12, "thief": 12, "born": 12, "bad": 12, "neither": 12, "slate": 12, "societi": 12, "parent": 12, "educ": 12, "what": [12, 14], "somewher": 12, "isn": [12, 15], "back": 12, "track": 12, "bet": 12, "wast": 12, "piec": 12, "valid": 12, "late": 12, "penn": 12, "teller": 12, "joe": 12, "bob": 12, "fridai": [12, 14], "school": 12, "year": 12, "doubt": 12, "televis": 12, "didn": 12, "stai": 12, "miss": 12, "john": 12, "bloom": 12, "live": 12, "belong": [12, 14], "question": [12, 14], "anyon": 12, "hour": 12, "moral": 12, "disast": 12, "david": 12, "care": 12, "purpos": 12, "singl": 12, "qualiti": 12, "treat": 12, "afternoon": 12, "budget": 12, "project": [12, 13], "stori": 12, "eva": 12, "tv": 12, "ideal": 12, "mani": 12, "cours": 12, "special": 12, "effect": 12, "gun": 12, "scene": 12, "move": 12, "although": 12, "problem": 12, "rent": 12, "student": 12, "ye": 12, "nake": 12, "emperor": 12, "speak": 12, "big": 12, "someon": 12, "state": [12, 14], "truth": 12, "old": 12, "bodi": 12, "nude": 12, "artist": 12, "front": 12, "audienc": 12, "ev": 12, "poor": 12, "wanna": 12, "ladi": 12, "sensit": 12, "becam": 12, "petti": 12, "satisfact": 12, "alarm": 12, "signal": [12, 13], "degre": 12, "work": [12, 14], "art": [12, 14], "cross": 12, "mix": 12, "ordinari": 12, "rural": 12, "pacif": 12, "northwest": 12, "solid": 12, "fine": 12, "dan": 12, "same": [12, 14], "highli": 12, "crash": 12, "paul": 12, "pace": 12, "action": 12, "urban": 12, "lo": 12, "angel": 12, "apart": 12, "relationship": [12, 15], "jim": 12, "0x7f5215ef6ea0": 12, "\u0e01\u0e32\u0e25\u0e04\u0e23": 12, "\u0e07\u0e19\u0e32\u0e19\u0e21\u0e32\u0e41\u0e25": 12, "min_p": 12, "005": 12, "\u0e27\u0e07\u0e2a\u0e2d\u0e07\u0e2b\u0e19": 12, "\u0e10\u0e32\u0e19\u0e30\u0e23": 12, "\u0e33\u0e23\u0e27\u0e22": 12, "\u0e41\u0e25\u0e30\u0e40\u0e1b": 12, "\u0e19\u0e25": 12, "\u0e01\u0e2a\u0e32\u0e27\u0e02\u0e2d\u0e07": 12, "\u0e14\u0e23": 12, "\u0e42\u0e04\u0e25": 12, "\u0e1a\u0e1a\u0e17\u0e42\u0e14\u0e22": 12, "\u0e2d\u0e25": 12, "\u0e01\u0e0a\u0e32\u0e22\u0e04\u0e19\u0e42\u0e15\u0e02\u0e2d\u0e07": 12, "\u0e42\u0e2d\u0e25": 12, "\u0e40\u0e27\u0e2d\u0e23": [12, 14], "\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "\u0e27\u0e07\u0e41\u0e23\u0e01": 12, "\u0e40\u0e02\u0e32\u0e40\u0e1b": 12, "\u0e42\u0e2d\u0e25\u0e25": 12, "\u0e40\u0e02\u0e32\u0e21": 12, "\u0e41\u0e25\u0e30\u0e41\u0e21": 12, "\u0e19\u0e04\u0e19\u0e17": 12, "\u0e15\u0e43\u0e08\u0e2d": 12, "\u0e2d\u0e19\u0e42\u0e22\u0e19": 12, "\u0e19\u0e40\u0e1e": 12, "\u0e2d\u0e19\u0e2a\u0e19": 12, "\u0e17\u0e01": 12, "\u0e04\u0e32\u0e25": 12, "\u0e42\u0e23\u0e2a": 12, "\u0e25\u0e2a": 12, "\u0e2d\u0e02\u0e2d\u0e07\u0e40\u0e18\u0e2d\u0e19": 12, "\u0e43\u0e19\u0e1b": 12, "1967": 12, "\u0e18\u0e44\u0e14": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e40\u0e25": 12, "\u0e22\u0e07\u0e14": 12, "\u0e08\u0e2d\u0e23": 12, "\u0e2a\u0e1b": 12, "\u0e25\u0e40\u0e1a": 12, "\u0e0b\u0e32\u0e23": 12, "\u0e2d\u0e21\u0e32\u0e01": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e14": 12, "\u0e41\u0e25\u0e08\u0e32\u0e01\u0e41\u0e21": 12, "\u0e07\u0e17\u0e33\u0e43\u0e2b": [12, 14], "\u0e01\u0e29\u0e30\u0e14": 12, "\u0e32\u0e19\u0e27": 12, "\u0e41\u0e25\u0e30\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 12, "\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e33\u0e40\u0e23": 12, "\u0e08\u0e01\u0e32\u0e23\u0e28": 12, "\u0e01\u0e29\u0e32\u0e08\u0e32\u0e01\u0e21\u0e2b\u0e32\u0e27": 12, "\u0e17\u0e22\u0e32\u0e25": 12, "\u0e22\u0e41\u0e25": 12, "\u0e19\u0e17\u0e32\u0e07\u0e44\u0e1b\u0e17": 12, "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e2d\u0e2d\u0e2a\u0e40\u0e15\u0e23\u0e40\u0e25": 12, "\u0e01\u0e29\u0e32": 12, "\u0e41\u0e25\u0e30\u0e43\u0e19\u0e0a": 12, "\u0e27\u0e07\u0e19": 12, "\u0e19\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "airesearch": [13, 14], "larg": 13, "xlsr": 13, "cu113": 13, "torchvis": 13, "torchaudio": 13, "pytorch": 13, "torch_stabl": 13, "html": 13, "link": 13, "2bcu113": 13, "1821": 13, "834": 13, "43tcmalloc": 13, "alloc": 13, "1147494400": 13, "byte": 13, "0x55bf21ac6000": 13, "0x7faf12d1b615": 13, "0x55bf1efac4cc": 13, "0x55bf1f08c47a": 13, "0x55bf1efaf2": 13, "0x55bf1f0a0e1d": 13, "0x55bf1f022e99": 13, "0x55bf1f01d9ee": 13, "0x55bf1efb0bda": 13, "0x55bf1f022d00": 13, "0x55bf1f01f737": 13, "0x55bf1f0a1c66": 13, "0x55bf1f01edaf": 13, "0x55bf1efb1039": 13, "0x55bf1eff4409": 13, "0x55bf1efafc52": 13, "0x55bf1f022c25": 13, "0x55bf1f01e915": 13, "0x55bf1efb0afa": 13, "0x55bf1f01ec0d": 13, "1055": 13, "37tcmalloc": 13, "1434370048": 13, "0x55bf6611c000": 13, "1336": 13, "39tcmalloc": 13, "1792966656": 13, "0x55bfbb908000": 13, "1691": 13, "38tcmalloc": 13, "2241208320": 13, "01tcmalloc": 13, "1821458432": 13, "0x55bfa7428000": 13, "0x7faf12d1a1e7": 13, "0x55bf1efe2067": 13, "tcmalloc": 13, "2276827136": 13, "0x55c013d3c000": 13, "0x55bf1efb1271": 13, "pillow": 13, "cu111": 13, "onnxruntim": 13, "soundfil": 13, "manylinux_2_12_x86_64": 13, "91": 13, "post1": 13, "895": [13, 15], "manylinux_2_5_x86_64": 13, "596": 13, "flatbuff": 13, "743": 13, "wav2vec2model": 13, "hug": 13, "autotoken": [13, 14], "wav2vec2forctc": 13, "import_huggingface_model": 13, "origin": [13, 15], "from_pretrain": [13, 14], "correspond": 13, "audio": 13, "stabl": 13, "hubert": 13, "configuration_util": 13, "341": 13, "gradient_checkpoint": 13, "v5": [13, 14], "gradient_checkpointing_en": 13, "trainer": [13, 14], "api": 13, "trainingargu": [13, 14], "eval": 13, "mode": 13, "feature_extractor": [13, 14], "featureextractor": 13, "conv_lay": 13, "convlayerblock": 13, "layer_norm": [13, 14], "layernorm": 13, "512": 13, "elementwise_affin": 13, "conv": 13, "conv1d": 13, "kernel_s": 13, "stride": 13, "feature_project": 13, "featureproject": 13, "1024": 13, "pos_conv_emb": 13, "convolutionalpositionalembed": 13, "128": 13, "encoderlay": 13, "attent": 13, "selfattent": 13, "k_proj": 13, "v_proj": 13, "q_proj": 13, "out_proj": 13, "feed_forward": 13, "feedforward": 13, "intermediate_dens": 13, "4096": 13, "intermediate_dropout": 13, "output_dens": 13, "output_dropout": 13, "final_layer_norm": 13, "microsoft": 13, "window": 13, "ai": [13, 14], "ml": 13, "input_s": 13, "100000": 13, "audio_maxlen": 13, "dummy_input": 13, "randn": 13, "requires_grad": 13, "export": 13, "asr3": 13, "export_param": 13, "opset_vers": 13, "do_constant_fold": 13, "whether": 13, "execut": 13, "constant": 13, "fold": 13, "input_nam": 13, "modelinput": 13, "output_nam": 13, "modeloutput": 13, "dynamic_ax": 13, "batch_siz": 13, "ax": [13, 14], "symbolic_help": 13, "325": 13, "caus": 13, "incorrect": 13, "dropbox": 13, "9kpeh8eodshcqhj": 13, "common_voice_th_23646850": 13, "wav": 13, "dl": 13, "mv": 13, "json": 13, "co": [13, 14], "r": [13, 15], "sig": 13, "sf": 13, "scipi": 13, "wavfil": 13, "sp": 13, "new_rat": 13, "16000": 13, "ort_sess": 13, "inferencesess": 13, "k": [13, 14], "unk": 13, "_normal": 13, "vasudevgupta7": 13, "gsoc": 13, "src": 13, "l101": 13, "fork": [13, 14], "tf": 13, "seqlen": 13, "keepdim": 13, "var": 13, "squeez": 13, "sqrt": 13, "remove_adjac": 13, "3460423": 13, "asr": 13, "wav2vec2_onnx": 13, "ipynb": [13, 15], "sampling_r": 13, "sampl": [13, 15], "new_data": 13, "resampl": 13, "float32": 13, "ort_input": 13, "ort_out": 13, "_t1": 13, "easili": 14, "finetun": 14, "drive": 14, "1kbk6sbspzlwcnoe61adaqo30xxqoq9ko": 14, "scrollto": 14, "n5iacot9b3cf": 14, "specif": [14, 15], "thaixtransform": 14, "236": 14, "106": 14, "safetensor": 14, "fsspec": 14, "355": 14, "seqev": 14, "28115": 14, "d0f182fee94a7c129f5bd1265a3e0d2a52893384d6783d11c8bbd770ef695fac": 14, "2c": 14, "4b": 14, "b2": 14, "a90368d80567249f258a9c58240512046afb5563d794eda4b2": 14, "auto": 14, "camemberttoken": 14, "automodel": 14, "automodelformaskedlm": 14, "automodelforsequenceclassif": 14, "automodelfortokenclassif": 14, "process_transform": 14, "xlmr": 14, "mbert": 14, "downstream": 14, "att": 14, "uncas": 14, "largest": 14, "78": 14, "5gb": 14, "assort": 14, "subword": 14, "xlm": 14, "multilingu": 14, "104": 14, "level": 14, "syllabl": 14, "syllabel": 14, "sefr": 14, "model_nam": [14, 15], "thaiwordsnewmmtoken": 14, "thaiwordssyllabletoken": 14, "fakesefrcuttoken": 14, "thairobertatoken": 14, "public_model": 14, "param": 14, "revis": 14, "model_max_length": 14, "416": 14, "unexpect": 14, "robertatoken": 14, "simplest": 14, "given": 14, "\u0e07\u0e08": 14, "\u0e19\u0e17\u0e23": 14, "\u0e25\u0e40\u0e25\u0e22": 14, "\u0e07\u0e2d\u0e22": 14, "\u0e1a\u0e19\u0e1e": 14, "454": 14, "\u0e02\u0e2d\u0e07\u0e2d\u0e33\u0e40\u0e20\u0e2d\u0e27": 14, "\u0e14\u0e23\u0e30\u0e22\u0e2d\u0e07": 14, "answer": [14, 15], "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e40\u0e21\u0e15\u0e23": 14, "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e27\u0e32": 14, "\u0e44\u0e21\u0e25": 14, "substitut": 14, "instanc": [14, 15], "000": 14, "trane": 14, "proven": 14, "increas": 14, "aug": 14, "english": 14, "fill_mask": 14, "fill": 14, "input_text": 14, "u0e02": 14, "u0e2d": 14, "u0e40": 14, "u0e07": 14, "u0e34": 14, "u0e19": 14, "u0e01": 14, "u0e39": 14, "u0e49": 14, "u003cmask": 14, "u0e2b": 14, "u0e48": 14, "u0e22": 14, "\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e21": 14, "\u0e23\u0e30\u0e22\u0e30\u0e17\u0e32\u0e07\u0e17": 14, "\u0e07\u0e2b\u0e21\u0e14": 14, "\u0e08\u0e33\u0e19\u0e27\u0e19\u0e2a\u0e16\u0e32\u0e19": 14, "\u0e2a\u0e16\u0e32\u0e19": 14, "\u0e19\u0e40\u0e2a": 14, "\u0e19\u0e17\u0e32\u0e07\u0e2b\u0e25": 14, "\u0e01\u0e43\u0e19\u0e41\u0e19\u0e27\u0e40\u0e2b\u0e19": 14, "\u0e43\u0e15": 14, "\u0e15\u0e32\u0e21\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07\u0e23\u0e16\u0e44\u0e1f\u0e40\u0e14": 14, "\u0e21\u0e02\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e23\u0e16\u0e44\u0e1f\u0e41\u0e2b": 14, "\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 14, "\u0e32\u0e27\u0e2b\u0e19": 14, "\u0e32\u0e40\u0e19": 14, "\u0e2d\u0e40\u0e23": 14, "\u0e22\u0e01\u0e40\u0e1b": 14, "\u0e19\u0e20\u0e32\u0e29\u0e32": 14, "gy\u016bdon": 14, "\u0e08\u0e30\u0e44\u0e1b\u0e40\u0e1b": 14, "\u0e42\u0e14\u0e14\u0e40\u0e14": 14, "\u0e19\u0e1a\u0e19\u0e1f\u0e32\u0e01\u0e1f": 14, "\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e02\u0e27": 14, "\u0e02\u0e27": 14, "\u0e32\u0e40\u0e2d\u0e32\u0e21\u0e32\u0e14": 14, "\u0e07\u0e43\u0e08\u0e1d": 14, "\u0e04\u0e22\u0e2d\u0e14": 14, "\u0e02\u0e2d\u0e40\u0e07": 14, "\u0e01\u0e14": [14, 15], "allow": 14, "preprocess_input_text": 14, "boolean": 14, "need": 14, "fill_mask_pad": 14, "513759434223175": 14, "4263": 14, "token_str": 14, "\u0e23\u0e32\u0e21": 14, "\u0e23\u0e32\u0e21\u0e2b\u0e19": 14, "05489557236433029": 14, "552": 14, "0474877767264843": 14, "125": 14, "037654660642147064": 14, "5901": 14, "\u0e2a\u0e30\u0e14\u0e27\u0e01": 14, "\u0e2a\u0e30\u0e14\u0e27\u0e01\u0e2b\u0e19": 14, "026551486924290657": 14, "1913": 14, "\u0e19\u0e32": 14, "\u0e19\u0e32\u0e2b\u0e19": 14, "wisesight_senti": 14, "social": 14, "media": 14, "wongnai_review": 14, "awai": [14, 15], "classify_multiclass": 14, "u0e04": 14, "u0e1a": 14, "u0e32": 14, "u0e47": 14, "u0e21": 14, "u0e31": 14, "u0e41": 14, "u0e17": 14, "u0e15": 14, "u0e4c": 14, "u0e25": 14, "u0e303": 14, "u0e27": 14, "u0e14": 14, "u0e42": 14, "u0e23": 14, "u0e30": 14, "u0e1b": 14, "u0e37": 14, "\u0e2d\u0e22\u0e32\u0e01\u0e01": 14, "\u0e19\u0e27\u0e30\u0e41\u0e01": 14, "\u0e2d\u0e21\u0e32\u0e43\u0e2b": 14, "\u0e2d\u0e22\u0e08": 14, "\u0e13\u0e41\u0e01\u0e21\u0e32\u0e01": 14, "\u0e42\u0e04\u0e15\u0e23\u0e1a": 14, "\u0e32\u0e40\u0e25\u0e22": 14, "\u0e1f\u0e2d\u0e23": 14, "\u0e01\u0e15\u0e25\u0e32\u0e14": 14, "\u0e19\u0e40\u0e14": 14, "prachachat": 14, "\u0e15\u0e25\u0e32\u0e14\u0e23\u0e16\u0e22\u0e19\u0e15": 14, "\u0e23\u0e2a\u0e0a\u0e32\u0e40\u0e02": 14, "\u0e22\u0e27\u0e40\u0e02": 14, "\u0e2b\u0e2d\u0e21": 14, "\u0e01\u0e25\u0e21\u0e01\u0e25": 14, "\u0e14\u0e41\u0e1a\u0e1a\u0e08": 14, "\u0e14\u0e2a\u0e19": 14, "\u0e27\u0e19\u0e44\u0e2d\u0e28\u0e04\u0e23": 14, "\u0e17\u0e32\u0e19\u0e41\u0e25": 14, "\u0e27\u0e23\u0e2a\u0e21": 14, "\u0e19\u0e2d\u0e2d\u0e01\u0e43\u0e1a\u0e44\u0e21": 14, "\u0e46\u0e21\u0e32\u0e01\u0e01\u0e27": 14, "\u0e32\u0e0a\u0e32\u0e40\u0e02": 14, "\u0e27\u0e01": 14, "\u0e2b\u0e27\u0e32\u0e19\u0e44\u0e1b": 14, "\u0e42\u0e14\u0e22\u0e23\u0e27\u0e21\u0e41\u0e25": 14, "\u0e27\u0e40\u0e09\u0e22\u0e21\u0e32\u0e01\u0e01": 14, "\u0e33\u0e40\u0e1b\u0e25": 14, "\u0e32\u0e1a\u0e23": 14, "\u0e01\u0e32\u0e23\u0e1f\u0e23": 14, "\u0e40\u0e04\u0e22\u0e1a": 14, "\u0e32\u0e40\u0e2d": 14, "\u0e21\u0e40\u0e04\u0e01": 14, "\u0e1a\u0e41\u0e21": 14, "\u0e25\u0e303": 14, "\u0e42\u0e04\u0e15\u0e23\u0e2b\u0e19": 14, "\u0e01\u0e41\u0e25\u0e30\u0e42\u0e04\u0e15\u0e23\u0e40\u0e1b\u0e25": 14, "\u0e2d\u0e07\u0e07\u0e07\u0e07": 14, "892067551612854": 14, "entiti": 14, "recognit": 14, "classify_token": 14, "ignore_label": 14, "token_classif": 14, "169": 14, "aggregation_strategi": 14, "u0e35": 14, "u0e2a": 14, "u0e38": 14, "u0e44": 14, "\u0e41\u0e14\u0e07\u0e40\u0e14": 14, "\u0e2d\u0e14\u0e23\u0e2d\u0e1a\u0e2a\u0e2d\u0e07": 14, "\u0e01\u0e40\u0e22": 14, "\u0e41\u0e21\u0e19\u0e2f": 14, "\u0e44\u0e19\u0e40\u0e15": 14, "\u0e22\u0e40\u0e2a": 14, "\u0e22\u0e07\u0e2a": 14, "\u0e0d\u0e40\u0e2a": 14, "\u0e22\u0e08\u0e32\u0e01\u0e20": 14, "\u0e22\u0e18\u0e23\u0e23\u0e21\u0e0a\u0e32\u0e15": 14, "\u0e21\u0e32\u0e01\u0e2a": 14, "\u0e17\u0e33\u0e43\u0e2b": 14, "\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19\u0e01\u0e27": 14, "\u0e2d\u0e07\u0e2d\u0e1e\u0e22\u0e1e\u0e2d\u0e2d\u0e01\u0e08\u0e32\u0e01\u0e1e": 14, "\u0e1e\u0e25\u0e02\u0e2d\u0e07\u0e1e\u0e32\u0e22": 14, "\u0e32\u0e19\u0e40\u0e23": 14, "\u0e2d\u0e19\u0e40\u0e01": 14, "\u0e2d\u0e1a": 14, "700": 14, "\u0e07\u0e1e": 14, "\u0e07\u0e16\u0e25": 14, "\u0e21\u0e25\u0e07\u0e21\u0e32": 14, "\u0e32\u0e07\u0e04\u0e27\u0e32\u0e21\u0e40\u0e2a": 14, "\u0e22\u0e2b\u0e32\u0e22\u0e04": 14, "\u0e14\u0e40\u0e1b": 14, "\u0e25\u0e04": 14, "450": 14, "\u0e32\u0e19\u0e2b\u0e22\u0e27\u0e19": 14, "\u0e01\u0e17\u0e0a": 14, "\u0e40\u0e15\u0e23": 14, "\u0e22\u0e21\u0e17\u0e14\u0e25\u0e2d\u0e07\u0e1b\u0e23\u0e30\u0e21": 14, "3\u0e08": 14, "entity_group": 14, "97664016": 14, "99976474": 14, "less": 14, "tradit": 14, "logist": 14, "regress": 14, "forest": 14, "boost": 14, "imag": 14, "mrpeerat": 14, "bramvanroi": 14, "extract_last_k_token": 14, "last_k": 14, "hidden_st": 14, "last_k_token": 14, "concatenated_hidden_st": 14, "sum": 14, "_extract_last_k_lay": 14, "aggregator_fn": 14, "return_tensor": 14, "pt": 14, "no_grad": 14, "output_hidden_st": 14, "select": 14, "hidden": 14, "cat": 14, "aggregated_hidden_st": 14, "extract_last_k_lay": 14, "pretrained_model_name_or_path": 14, "lm_head": 14, "japanes": 14, "food": [14, 15], "gyudon": 14, "italian": 14, "macaroni": 14, "cosin": 14, "consid": 14, "last": 14, "markdown": 14, "obtain": 14, "aggreg": 14, "via": 14, "summat": 14, "represnetaiton": 14, "text1": 14, "\u0e19\u0e0a\u0e2d\u0e1a\u0e01": 14, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e0d": 14, "text2": 14, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2d": 14, "\u0e15\u0e32\u0e40\u0e25": 14, "text3": 14, "text4": 14, "\u0e01\u0e01\u0e30\u0e42\u0e23\u0e19": 14, "t1": 14, "t3": 14, "t4": 14, "pairwis": 14, "cosine_similar": 14, "sim_matrix": 14, "cmap": 14, "blue": 14, "bo": 14, "fanci": 14, "mayb": 14, "march": 14, "releas": [14, 15], "wanchanberta": 14, "xnli": 14, "pair": 14, "branch": 14, "xnli_th": 14, "repositori": 14, "zero_classifi": 14, "u0e0d": 14, "u0e0a": 14, "u0e1": 14, "u0e18": 14, "scb": 14, "10x": 14, "u0e43": 14, "blockfi": 14, "startup": 14, "digit": 14, "asset": 14, "u0e13": 14, "u0e10": 14, "u201c": 14, "u201d": 14, "u0e1c": 14, "u0e20": 14, "u0e29": 14, "u201cwher": 14, "u0e08": 14, "u0e16": 14, "u0e1f": 14, "u0e28": 14, "u0e33": 14, "u0e11": 14, "u0e1d": 14, "candidate_label": 14, "\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01": 14, "\u0e23\u0e01": 14, "\u0e01\u0e32\u0e23\u0e40\u0e21": 14, "\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 14, "\u0e25\u0e1b\u0e30": 14, "\u0e19\u0e40\u0e17": 14, "hypothesis_templ": 14, "\u0e1e\u0e32\u0e14\u0e2b": 14, "\u0e27\u0e02": 14, "\u0e32\u0e27\u0e19": 14, "\u0e21\u0e44\u0e1a\u0e40\u0e14\u0e19\u0e2b\u0e32\u0e23": 14, "\u0e1a\u0e0d": 14, "\u0e01\u0e23\u0e30\u0e0a": 14, "\u0e1a\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": 14, "\u0e19\u0e18\u0e21": 14, "34431710839271545": 14, "3195861279964447": 14, "18645761907100677": 14, "14963914453983307": 14, "v0": 15, "word2vec": 15, "oppos": 15, "latter": 15, "garner": 15, "556": 15, "dimens": 15, "descend": 15, "frequenc": 15, "format": 15, "readabl": 15, "applic": 15, "thwiki_lm": 15, "word2vec_exampl": 15, "inlin": 15, "manifold": 15, "tsne": 15, "fm": 15, "load_word2vec_format": 15, "wordvector": 15, "thai2fit_wv": 15, "get_model": 15, "thai2dict": 15, "index2word": 15, "from_dict": 15, "orient": 15, "290": 15, "291": 15, "292": 15, "293": 15, "294": 15, "295": 15, "296": 15, "298": 15, "299": 15, "308956": 15, "097699": 15, "116745": 15, "215612": 15, "015768": 15, "064163": 15, "062168": 15, "039649": 15, "864940": 15, "846904": 15, "142418": 15, "033241": 15, "171581": 15, "624864": 15, "009358": 15, "449131": 15, "120130": 15, "122195": 15, "450617": 15, "071318": 15, "010751": 15, "618971": 15, "129665": 15, "035460": 15, "007560": 15, "027607": 15, "397824": 15, "026543": 15, "254075": 15, "168328": 15, "105786": 15, "180930": 15, "101630": 15, "070885": 15, "037263": 15, "183606": 15, "049088": 15, "672288": 15, "293044": 15, "592576": 15, "015736": 15, "258926": 15, "052953": 15, "153728": 15, "005985": 15, "021081": 15, "041088": 15, "057312": 15, "633230": 15, "442729": 15, "009408": 15, "252576": 15, "305512": 15, "372542": 15, "049151": 15, "568470": 15, "266586": 15, "400800": 15, "784650": 15, "197369": 15, "189711": 15, "174774": 15, "171124": 15, "186771": 15, "054294": 15, "114150": 15, "109456": 15, "094466": 15, "447015": 15, "042377": 15, "168676": 15, "148738": 15, "680404": 15, "097702": 15, "020270": 15, "182967": 15, "083949": 15, "006287": 15, "707434": 15, "070234": 15, "156962": 15, "231863": 15, "080312": 15, "323157": 15, "215695": 15, "055145": 15, "420794": 15, "016842": 15, "256759": 15, "832864": 15, "044267": 15, "147186": 15, "105424": 15, "907078": 15, "009299": 15, "550953": 15, "139337": 15, "031696": 15, "670379": 15, "008048": 15, "428813": 15, "031194": 15, "041922": 15, "036608": 15, "008106": 15, "076470": 15, "782270": 15, "033361": 15, "606864": 15, "440520": 15, "024458": 15, "025031": 15, "103389": 15, "078255": 15, "034323": 15, "459774": 15, "748643": 15, "337775": 15, "487408": 15, "511535": 15, "287710": 15, "064193": 15, "205076": 15, "146356": 15, "071343": 15, "039451": 15, "845461": 15, "163763": 15, "018096": 15, "272786": 15, "051024": 15, "532856": 15, "131856": 15, "090323": 15, "058895": 15, "151262": 15, "420358": 15, "055971": 15, "930814": 15, "163908": 15, "239587": 15, "303620": 15, "079953": 15, "453045": 15, "528826": 15, "161692": 15, "235725": 15, "099673": 15, "691668": 15, "536159": 15, "110436": 15, "297495": 15, "217414": 15, "045158": 15, "066647": 15, "190095": 15, "304333": 15, "724927": 15, "995488": 15, "716609": 15, "120522": 15, "355783": 15, "168180": 15, "377733": 15, "158624": 15, "047249": 15, "361140": 15, "161460": 15, "913314": 15, "345037": 15, "116285": 15, "318218": 15, "356664": 15, "519889": 15, "130475": 15, "125772": 15, "101328": 15, "382658": 15, "205359": 15, "340139": 15, "086848": 15, "155231": 15, "133015": 15, "039913": 15, "183761": 15, "115142": 15, "940854": 15, "066565": 15, "399744": 15, "146722": 15, "019406": 15, "181474": 15, "099863": 15, "516092": 15, "201697": 15, "249139": 15, "252957": 15, "138815": 15, "018209": 15, "232265": 15, "sne": 15, "compress": 15, "plane": 15, "thai2plot": 15, "tnse": 15, "n_compon": 15, "init": 15, "pca": 15, "n_iter": 15, "fit_transform": 15, "wb": 15, "jeffmcneil": 15, "dip": 15, "sipa": 15, "regular": 15, "111": 15, "479628": 15, "468k": 15, "octet": 15, "regu": 15, "468": 15, "39k": 15, "stolen": 15, "blog": 15, "manash": 15, "a71e6d55f27": 15, "plot_with_label": 15, "low_dim_emb": 15, "figsiz": 15, "axis_lim": 15, "assert": 15, "figur": 15, "inch": 15, "scatter": 15, "prop": 15, "fontproperti": 15, "fname": 15, "xy": 15, "xytext": 15, "textcoord": 15, "offset": 15, "va": 15, "bottom": 15, "savefig": 15, "\u0e2b\u0e0d": 15, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e32": 15, "\u0e0a\u0e32\u0e22": 15, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a": 15, "\u0e19\u0e32\u0e22\u0e01\u0e23": 15, "\u0e10\u0e21\u0e19\u0e15\u0e23": 15, "\u0e2d\u0e33\u0e19\u0e32\u0e08": 15, "\u0e1b\u0e23\u0e30\u0e18\u0e32\u0e19\u0e32\u0e18": 15, "\u0e07\u0e01": 15, "\u0e42\u0e1a\u0e23\u0e32\u0e13": 15, "\u0e44\u0e14\u0e42\u0e19\u0e40\u0e2a\u0e32\u0e23": 15, "most_similar_cosmul": 15, "7954867482185364": 15, "7382755279541016": 15, "\u0e1e\u0e23\u0e30\u0e40\u0e08": 15, "7046602368354797": 15, "\u0e32\u0e0a\u0e32\u0e22": 15, "6979373097419739": 15, "\u0e1e\u0e23\u0e30\u0e21\u0e2b\u0e32\u0e01\u0e29": 15, "6972416639328003": 15, "\u0e32\u0e1f": 15, "\u0e32\u0e2b\u0e0d": 15, "6871017217636108": 15, "\u0e32\u0e41\u0e1c": 15, "6827988624572754": 15, "\u0e1e\u0e23\u0e30\u0e1e": 15, "\u0e17\u0e18\u0e40\u0e08": 15, "671796977519989": 15, "\u0e21\u0e01": 15, "\u0e0e\u0e23\u0e32\u0e0a\u0e01": 15, "\u0e21\u0e32\u0e23": 15, "6711805462837219": 15, "\u0e19\u0e32\u0e22\u0e1e\u0e25": 15, "6694187521934509": 15, "sample_word": 15, "sample_idx": 15, "sample_plot": 15, "\u0e23\u0e2d\u0e07\u0e19\u0e32\u0e22\u0e01\u0e23": 15, "4945054054260254": 15, "400755763053894": 15, "3626699447631836": 15, "\u0e19\u0e40\u0e2d\u0e01": 15, "3437265157699585": 15, "\u0e0d\u0e0a\u0e32\u0e01\u0e32\u0e23\u0e17\u0e2b\u0e32\u0e23\u0e1a\u0e01": 15, "3405414819717407": 15, "\u0e1a\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 15, "3339321613311768": 15, "\u0e01\u0e1f": 15, "\u0e15\u0e1a\u0e2d\u0e25": 15, "331659197807312": 15, "\u0e40\u0e2d\u0e01\u0e2d": 15, "\u0e04\u0e23\u0e23\u0e32\u0e0a\u0e17": 15, "3306005001068115": 15, "3243674039840698": 15, "\u0e20\u0e32\u0e1e\u0e2a\u0e15\u0e23": 15, "3231494426727295": 15, "\u0e15\u0e27": 15, "\u0e07\u0e21": 15, "537461519241333": 15, "\u0e22\u0e07\u0e25": 15, "\u0e27\u0e22\u0e19\u0e21": 15, "5080005526542664": 15, "\u0e41\u0e21\u0e25\u0e07": 15, "5048903226852417": 15, "\u0e1c\u0e25\u0e44\u0e21": 15, "4839756190776825": 15, "47641509771347046": 15, "46431201696395874": 15, "45941096544265747": 15, "45185261964797974": 15, "4504697620868683": 15, "44425833225250244": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e0a": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2a": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e22": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e01\u0e25\u0e32\u0e07\u0e27": 15, "wherea": 15, "meal": 15, "\u0e25\u0e32\u0e01": 15, "push": 15, "rest": 15, "eat": 15, "reli": 15, "\u0e01\u0e40\u0e02\u0e22": 15, "associ": 15, "male": 15, "gender": 15, "\u0e2b\u0e21\u0e32": 15, "\u0e2b\u0e21\u0e2d": 15, "china": 15, "beij": 15, "itali": 15, "rome": 15, "\u0e42\u0e23\u0e21": 15, "\u0e15\u0e32\u0e25": 15, "3135956": 15, "42819628": 15, "27347285": 15, "17900795": 15, "02666693": 15, "24352394": 15, "\u0e42\u0e15\u0e40\u0e01": 15, "contribut": 15, "sakar": 15, "atv": 15, "adapt": 15, "spell": 15, "cpmp": 15, "w_rank": 15, "thai_lett": 15, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e24\u0e45\u0e25\u0e26\u0e26\u0e45\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e30": 15, "\u0e40\u0e41\u0e42\u0e43\u0e44": 15, "findal": 15, "lower": 15, "invers": 15, "proxi": 15, "dictionari": 15, "max": 15, "candid": 15, "edits1": 15, "edits2": 15, "subset": 15, "appear": 15, "delet": 15, "transpos": 15, "replac": 15, "insert": 15, "e1": 15, "\u0e14\u0e19\u0e32": 15, "\u0e12\u0e19\u0e32": 15, "\u0e02\u0e23": 15, "\u0e08\u0e22": 15, "\u0e19\u0e30\u0e04": 15}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"welcom": 0, "pythainlp": [0, 1, 4, 5, 7, 10], "tutori": 0, "han": 1, "coref": 1, "thai": [1, 2, 3, 4, 6, 7, 12, 13], "corefer": 1, "resolut": 1, "depend": 2, "parser": [2, 6], "find": 3, "all": 3, "rhyme": 3, "word": [3, 7, 15], "from": 3, "translat": 4, "instal": [4, 5, 13, 14], "import": [4, 7, 15], "list": 4, "languag": [4, 9, 11, 12], "english": 4, "nlpo3": 5, "dictionari": [5, 7], "custom": [5, 7], "chunk": 6, "get": [7, 14], "start": [7, 14], "charact": 7, "check": 7, "string": 7, "contain": 7, "how": 7, "mani": 7, "collat": 7, "date": 7, "time": 7, "format": 7, "spellout": 7, "token": [7, 14], "segment": 7, "sentenc": 7, "subword": [7, 8], "syllabl": 7, "cluster": 7, "tcc": 7, "low": 7, "level": 7, "oper": 7, "transliter": 7, "normal": 7, "digit": 7, "convers": 7, "soundex": 7, "spellcheck": [7, 15], "frequenc": 7, "part": [7, 8], "speech": [7, 8], "tag": 7, "name": [7, 8], "entiti": [7, 8], "vector": [7, 14], "number": 7, "spell": 7, "out": 7, "wangchanberta": [8, 14], "recognit": 8, "wisesight": [9, 14], "sentiment": [9, 14], "analysi": 9, "text": [9, 12, 14], "processor": 9, "logist": 9, "regress": 9, "process": 9, "file": 9, "csv": 9, "load": 9, "data": 9, "train": 9, "valid": 9, "split": 9, "creat": 9, "featur": [9, 14], "fit": 9, "model": [9, 11, 12, 13, 14], "see": 9, "result": 9, "ulmfit": [9, 11], "finetun": [9, 11], "classifi": [9, 14], "spaci": 10, "wongnai": [11, 14], "review": [11, 14], "classif": [11, 14], "oversampl": 11, "fasttext": 11, "linearsvc": 11, "submiss": 11, "wiki": 12, "gener": 12, "wav2vec2": 13, "onnx": 13, "build": 13, "infer": 13, "notebook": 14, "choos": 14, "pretrain": 14, "mask": 14, "predict": 14, "sequenc": 14, "multi": 14, "class": 14, "thainer": 14, "lst20": 14, "document": 14, "extract": 14, "zero": 14, "shot": 14, "thai2vec": 15, "embed": 15, "exampl": 15, "arithmet": 15, "doesn": 15, "t": 15, "match": 15, "cosin": 15, "similar": 15}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"Welcome to PyThaiNLP Tutorials": [[0, "welcome-to-pythainlp-tutorials"]], "Tutorials:": [[0, null]], "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP": [[1, "\ud83e\udebf-Han-Coref:-Thai-Coreference-resolution-by-PyThaiNLP"]], "Thai Dependency Parser": [[2, "Thai-Dependency-Parser"]], "Find all Thai rhyming words from Thai word": [[3, "Find-all-Thai-rhyming-words-from-Thai-word"]], "PyThaiNLP Translate": [[4, "PyThaiNLP-Translate"]], "Install": [[4, "Install"], [13, "Install"]], "Translate": [[4, "Translate"]], "Import": [[4, "Import"]], "List language": [[4, "List-language"]], "English to Thai": [[4, "English-to-Thai"]], "Thai to English": [[4, "Thai-to-English"]], "nlpO3": [[5, "nlpO3"]], "Installation": [[5, "Installation"], [14, "Installation"]], "PyThaiNLP dictionary": [[5, "PyThaiNLP-dictionary"]], "Custom dictionary": [[5, "Custom-dictionary"]], "Thai Chunk Parser": [[6, "Thai-Chunk-Parser"]], "PyThaiNLP Get Started": [[7, "PyThaiNLP-Get-Started"]], "Import PyThaiNLP": [[7, "Import-PyThaiNLP"]], "Thai Characters": [[7, "Thai-Characters"]], "Checking if a string contains Thai character or not, or how many": [[7, "Checking-if-a-string-contains-Thai-character-or-not,-or-how-many"]], "Collation": [[7, "Collation"]], "Date/Time Format and Spellout": [[7, "Date/Time-Format-and-Spellout"]], "Date/Time Format": [[7, "Date/Time-Format"]], "Time Spellout": [[7, "Time-Spellout"]], "Tokenization and Segmentation": [[7, "Tokenization-and-Segmentation"]], "Sentence": [[7, "Sentence"]], "Word": [[7, "Word"]], "Subword, syllable, and Thai Character Cluster (TCC)": [[7, "Subword,-syllable,-and-Thai-Character-Cluster-(TCC)"]], "Subword tokenization": [[7, "Subword-tokenization"]], "Syllable tokenization": [[7, "Syllable-tokenization"]], "Low-level subword operations": [[7, "Low-level-subword-operations"]], "Transliteration": [[7, "Transliteration"]], "Normalization": [[7, "Normalization"]], "Digit conversion": [[7, "Digit-conversion"]], "Soundex": [[7, "Soundex"]], "Spellchecking": [[7, "Spellchecking"], [15, "Spellchecking"]], "Spellchecking - Custom dictionary and word frequency": [[7, "Spellchecking---Custom-dictionary-and-word-frequency"]], "Part-of-Speech Tagging": [[7, "Part-of-Speech-Tagging"]], "Named-Entity Tagging": [[7, "Named-Entity-Tagging"]], "Word Vector": [[7, "Word-Vector"]], "Number Spell Out": [[7, "Number-Spell-Out"]], "Wangchanberta": [[8, "Wangchanberta"]], "Named Entity Recognition": [[8, "Named-Entity-Recognition"]], "Part of speech": [[8, "Part-of-speech"]], "Subword": [[8, "Subword"]], "Wisesight Sentiment Analysis": [[9, "Wisesight-Sentiment-Analysis"]], "Text Processor for Logistic Regression": [[9, "Text-Processor-for-Logistic-Regression"]], "Process Text Files to CSVs": [[9, "Process-Text-Files-to-CSVs"]], "Load Data": [[9, "Load-Data"]], "Train-validation Split": [[9, "Train-validation-Split"]], "Logistic Regression": [[9, "Logistic-Regression"]], "Create Features": [[9, "Create-Features"]], "Fit Model": [[9, "Fit-Model"]], "See Results": [[9, "See-Results"], [9, "id1"]], "ULMFit Model": [[9, "ULMFit-Model"], [11, "ULMFit-Model"]], "Finetune Language Model": [[9, "Finetune-Language-Model"], [11, "Finetune-Language-Model"]], "Train Text Classifier": [[9, "Train-Text-Classifier"]], "spaCy-PyThaiNLP": [[10, "spaCy-PyThaiNLP"]], "Wongnai Review Classification": [[11, "Wongnai-Review-Classification"]], "Oversampling": [[11, "Oversampling"]], "fastText Model": [[11, "fastText-Model"]], "LinearSVC Model": [[11, "LinearSVC-Model"]], "Classification": [[11, "Classification"]], "Submission": [[11, "Submission"]], "Thai Wiki Language Model for Text Generation": [[12, "Thai-Wiki-Language-Model-for-Text-Generation"]], "Thai Wav2vec2 model to ONNX model": [[13, "Thai-Wav2vec2-model-to-ONNX-model"]], "Build ONNX Model": [[13, "Build-ONNX-Model"]], "Inference": [[13, "Inference"]], "WangchanBERTa: Getting Started Notebook": [[14, "WangchanBERTa:-Getting-Started-Notebook"]], "Choose Pretrained Model": [[14, "Choose-Pretrained-Model"]], "Masked Token Prediction": [[14, "Masked-Token-Prediction"]], "Sequence Classification": [[14, "Sequence-Classification"]], "Pretrained Multi-class Classifiers - Wisesight Sentiment and Wongnai Reviews": [[14, "Pretrained-Multi-class-Classifiers---Wisesight-Sentiment-and-Wongnai-Reviews"]], "Token Classification": [[14, "Token-Classification"]], "Pretrained Token Classifiers - ThaiNER and LST20": [[14, "Pretrained-Token-Classifiers---ThaiNER-and-LST20"]], "Document Vectors": [[14, "Document-Vectors"]], "Feature Extraction": [[14, "Feature-Extraction"]], "Zero-shot Text Classification": [[14, "Zero-shot-Text-Classification"]], "Thai2Vec Embeddings Examples": [[15, "Thai2Vec-Embeddings-Examples"]], "Imports": [[15, "Imports"]], "Word Arithmetic": [[15, "Word-Arithmetic"]], "Doesn\u2019t Match": [[15, "Doesn't-Match"]], "Cosine Similarity": [[15, "Cosine-Similarity"]]}, "indexentries": {}}) \ No newline at end of file +Search.setIndex({"docnames": ["index", "notebooks/Han-Coref", "notebooks/Thai_Dependency_Parser", "notebooks/find_all_thai_rhyming_words", "notebooks/machine_translation", "notebooks/nlpo3ipynb", "notebooks/pythainlp_chunk", "notebooks/pythainlp_get_started", "notebooks/pythainlp_wangchanberta", "notebooks/sentiment_analysis", "notebooks/spaCy_PyThaiNLP_demo", "notebooks/text_classification", "notebooks/text_generation", "notebooks/thai_wav2vec2_onnx", "notebooks/wangchanberta_getting_started_aireseach", "notebooks/word2vec_examples"], "filenames": ["index.rst", "notebooks/Han-Coref.ipynb", "notebooks/Thai_Dependency_Parser.ipynb", "notebooks/find_all_thai_rhyming_words.ipynb", "notebooks/machine_translation.ipynb", "notebooks/nlpo3ipynb.ipynb", "notebooks/pythainlp_chunk.ipynb", "notebooks/pythainlp_get_started.ipynb", "notebooks/pythainlp_wangchanberta.ipynb", "notebooks/sentiment_analysis.ipynb", "notebooks/spaCy_PyThaiNLP_demo.ipynb", "notebooks/text_classification.ipynb", "notebooks/text_generation.ipynb", "notebooks/thai_wav2vec2_onnx.ipynb", "notebooks/wangchanberta_getting_started_aireseach.ipynb", "notebooks/word2vec_examples.ipynb"], "titles": ["Welcome to PyThaiNLP Tutorials", "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP", "Thai Dependency Parser", "Find all Thai rhyming words from Thai word", "PyThaiNLP Translate", "nlpO3", "Thai Chunk Parser", "PyThaiNLP Get Started", "Wangchanberta", "Wisesight Sentiment Analysis", "spaCy-PyThaiNLP", "Wongnai Review Classification", "Thai Wiki Language Model for Text Generation", "Thai Wav2vec2 model to ONNX model", "WangchanBERTa: Getting Started Notebook", "Thai2Vec Embeddings Examples"], "terms": {"i": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "python": [0, 2, 3, 4, 5, 6, 8, 10, 13, 14], "librari": [0, 5, 6], "thai": [0, 5, 8, 9, 10, 11, 14, 15], "natur": [0, 5], "languag": [0, 5, 7, 8, 14], "process": [0, 5, 7, 11, 13, 15], "han": 0, "coref": 0, "corefer": 0, "resolut": [0, 12], "depend": [0, 9, 10, 14], "parser": 0, "find": [0, 7, 9], "all": [0, 4, 7, 11, 12, 14, 15], "rhyme": 0, "word": [0, 5, 6, 9, 10, 11, 12, 14], "from": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "translat": 0, "nlpo3": 0, "chunk": 0, "get": [0, 2, 9, 11, 12, 15], "start": [0, 1, 2, 12], "wangchanberta": 0, "wisesight": [0, 11], "sentiment": [0, 11], "analysi": [0, 14], "spaci": [0, 1, 2], "wongnai": 0, "review": 0, "classif": [0, 7, 9, 15], "wiki": [0, 11, 14], "model": [0, 4, 6, 7, 8, 10, 15], "text": [0, 1, 4, 5, 6, 7, 11, 13, 15], "gener": [0, 6, 7, 14, 15], "wav2vec2": 0, "onnx": 0, "notebook": [0, 8, 9, 12, 13], "instal": [0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 15], "choos": 0, "pretrain": [0, 8, 9, 11, 12], "mask": 0, "token": [0, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 15], "predict": [0, 9, 10, 11, 12, 13], "sequenc": [0, 7, 15], "document": [0, 15], "vector": [0, 15], "thai2vec": 0, "embed": [0, 9, 11, 12], "exampl": [0, 6, 7, 9, 14], "apach": 0, "softwar": 0, "licens": 0, "2": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "0": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "maintain": 0, "team": 0, "see": [0, 7, 11, 12, 14, 15], "sourc": [0, 2, 10], "code": [0, 7, 11, 13, 15], "http": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "github": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "com": [0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "main": [0, 7, 12, 13, 14], "develop": [0, 12, 14], "websit": 0, "io": [0, 10, 13], "interact": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "onlin": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "version": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "pip": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "q": [1, 9, 12], "fastcoref": 1, "transform": [1, 8, 9, 10, 12, 13, 14], "sentencepiec": [1, 4, 8, 14], "prepar": [1, 5, 10, 14], "metadata": [1, 2, 8, 10, 13, 14], "setup": [1, 2, 4, 7, 8, 10, 14], "py": [1, 2, 4, 7, 8, 9, 10, 13, 14, 15], "done": [1, 2, 4, 7, 8, 10, 12, 14], "13": [1, 2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "4": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "mb": [1, 3, 5, 6, 10, 13, 14, 15], "114": [1, 14, 15], "": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15], "eta": [1, 3, 6, 10, 11, 13, 14], "00": [1, 3, 4, 6, 7, 9, 10, 13, 14], "7": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "119": 1, "474": 1, "6": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "kb": [1, 3, 5, 6, 10, 13, 14, 15], "53": [1, 7, 11, 13], "110": [1, 5, 14, 15], "5": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15], "14": [1, 4, 5, 7, 8, 9, 10, 11, 13, 14, 15], "9": [1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "212": 1, "25": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "134": 1, "3": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "17": [1, 2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "90": 1, "224": 1, "29": [1, 4, 7, 8, 9, 11, 14, 15], "8": [1, 2, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "95": [1, 4, 8], "268": 1, "32": [1, 7, 9, 10, 11, 14], "149": 1, "19": [1, 2, 3, 4, 7, 8, 9, 10, 13, 15], "build": [1, 2, 4, 8, 10, 14], "wheel": [1, 2, 4, 8, 10, 14], "import": [1, 2, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14], "spacy_compon": 1, "nlp": [1, 2, 10], "blank": [1, 10], "th": [1, 4, 9, 10, 11, 12, 13, 15], "add_pip": [1, 10], "config": [1, 9, 10, 11, 12, 13], "model_architectur": 1, "fcoref": 1, "model_path": [1, 9, 11, 15], "v1": 1, "lt": [1, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 14], "fastcorefresolv": 1, "0x7fbd9c2b6560": 1, "gt": [1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15], "random": [1, 9, 14], "def": [1, 6, 7, 13, 14, 15], "get2tag": 1, "tag": [1, 6, 8, 10, 12], "titl": [1, 14], "none": [1, 2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "dic_ent": 1, "ent": [1, 10], "_tag": 1, "str": [1, 7], "list": [1, 3, 5, 6, 7, 10, 13, 15], "rang": [1, 11, 14, 15], "len": [1, 3, 7, 9, 11, 12, 13, 15], "enumer": [1, 6, 15], "e": [1, 7, 10, 14], "append": [1, 3, 11, 13, 15], "end": [1, 7, 12, 14], "label": [1, 9, 11, 14, 15], "color": 1, "join": [1, 9, 11, 13], "choic": [1, 12], "0123456789abcdef": 1, "j": [1, 7, 13], "thank": 1, "stackoverflow": [1, 13], "50218895": 1, "return": [1, 6, 7, 13, 14, 15], "displaci": [1, 10], "\u0e2a\u0e32\u0e18": 1, "\u0e15": [1, 3, 5, 7, 8, 9, 11, 14, 15], "\u0e41\u0e08\u0e07\u0e27": 1, "\u0e19": [1, 7, 9, 10, 11, 12, 14, 15], "\u0e20\u0e32\u0e1e\u0e41\u0e04\u0e1b\u0e01\u0e25": 1, "\u0e21\u0e44\u0e25\u0e19": 1, "\u0e17": [1, 3, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e1b\u0e23": [1, 7, 11, 12], "\u0e01\u0e29\u0e32\u0e2f": 1, "\u0e01\u0e25": [1, 3, 9, 11], "\u0e32\u0e27\u0e23": 1, "\u0e32\u0e22": [1, 7, 9, 11], "\u0e1e": [1, 3, 7, 9, 11, 12, 14, 15], "\u0e18\u0e32": 1, "\u0e22": [1, 7, 8, 9, 11, 12, 14, 15], "\u0e44\u0e21": [1, 7, 8, 9, 11, 14, 15], "\u0e43\u0e0a": [1, 7, 9], "\u0e27\u0e40\u0e2d\u0e07": [1, 9], "\u0e41\u0e15": [1, 7, 8, 9, 11, 14], "\u0e40\u0e2b": [1, 9, 11], "\u0e19\u0e14": [1, 7, 9, 15], "\u0e27\u0e22\u0e27": 1, "\u0e32\u0e2d\u0e20": 1, "\u0e1b\u0e23\u0e32\u0e22\u0e14": 1, "\u0e2d\u0e22\u0e04": 1, "\u0e32\u0e1a\u0e33\u0e19\u0e32\u0e0d": 1, "\u0e02\u0e23\u0e01": 1, "doc": [1, 2, 10, 11, 13], "_": [1, 7, 9, 11, 14], "coref_clust": 1, "render": [1, 7, 10], "manual": 1, "true": [1, 7, 8, 9, 10, 11, 12, 13, 14, 15], "style": [1, 10], "option": [1, 10, 12], "jupyt": [1, 10], "\u0e41\u0e21": [1, 9, 11, 14], "\u0e2a": [1, 7, 9, 11, 12, 14, 15], "\u0e07\u0e43\u0e2b": 1, "\u0e25": [1, 3, 7, 11, 14, 15], "\u0e01\u0e0a\u0e32\u0e22\u0e44\u0e1b\u0e0b": 1, "\u0e2d\u0e02\u0e2d\u0e07": [1, 12], "\u0e40\u0e18\u0e2d\u0e01\u0e25": 1, "\u0e1a\u0e25": 1, "\u0e21\u0e40\u0e2d\u0e32\u0e15": 1, "\u0e01": [1, 3, 6, 7, 9, 11, 12, 14, 15], "\u0e01\u0e0a\u0e32\u0e22": 1, "\u0e44\u0e1b\u0e0b": 1, "\u0e40\u0e18\u0e2d": [1, 9], "\u0e2b\u0e21\u0e2d\u0e41\u0e0a\u0e21\u0e1b": 1, "\u0e40\u0e1b": [1, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e14\u0e43\u0e08\u0e17": 1, "\u0e07\u0e19": [1, 7, 11], "\u0e33\u0e15\u0e32": 1, "\u0e40\u0e2a": [1, 7, 11], "\u0e22\u0e43\u0e08\u0e17": 1, "\u0e01\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "\u0e23": [1, 3, 7, 9, 10, 11, 12, 14, 15], "\u0e01\u0e20": 1, "\u0e21": [1, 7, 8, 9, 11, 12, 14, 15], "\u0e43\u0e08\u0e17": 1, "\u0e01\u0e40\u0e2a": 1, "\u0e22\u0e2a\u0e25\u0e30": 1, "\u0e43\u0e2b": [1, 7, 9, 11], "\u0e2d\u0e0a": 1, "\u0e0a": [1, 3, 7, 8, 9, 11, 12, 15], "\u0e1e\u0e0a": 1, "\u0e27\u0e22\u0e40\u0e1e": 1, "\u0e2d\u0e19\u0e17\u0e2b\u0e32\u0e23\u0e23\u0e2d\u0e14": 1, "\u0e27\u0e40\u0e2d\u0e07\u0e40\u0e2a": 1, "\u0e22\u0e0a": [1, 7], "\u0e27": [1, 7, 9, 11, 12, 14, 15], "\u0e08\u0e32\u0e01\u0e44\u0e1b": 1, "pythainlp": [2, 3, 6, 8, 9, 11, 12, 13, 14, 15], "doe": [2, 12], "come": [2, 5, 12], "instead": [2, 10, 13, 14], "you": [2, 5, 7, 9, 10, 11, 12, 13, 14, 15], "can": [2, 7, 10, 12, 14, 15], "us": [2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "which": [2, 5, 6, 7, 9, 12, 13, 15], "wa": [2, 12, 14, 15], "train": [2, 6, 10, 11, 12, 13, 14, 15], "univers": 2, "thi": [2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15], "tutori": [2, 5, 6, 13, 15], "show": [2, 9, 12, 13, 14], "how": [2, 5, 6, 12, 13, 14, 15], "spacy_thai": [2, 10], "collect": [2, 3, 4, 5, 6, 8, 10, 11, 12, 13, 14, 15], "download": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15], "file": [2, 4, 5, 8, 13, 15], "pythonhost": [2, 4, 8], "org": [2, 4, 8, 10, 13, 14], "packag": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "ca": [2, 10], "2d": [2, 15], "c2e71a4143d6d9cd9db6744e328dfb9f65b98ad7607644d0ad4369bce303": 2, "py3": [2, 4, 8, 10, 13, 14], "ani": [2, 4, 7, 8, 10, 12, 13, 14], "whl": [2, 3, 4, 6, 8, 10, 13, 14], "1mb": [2, 8], "11": [2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "2mb": [2, 4, 8], "ufal": [2, 10], "udpip": [2, 10], "e5": 2, "72": [2, 9, 14], "2b8b9dc7c80017c790bb3308bbad34b57accfed2ac2f1f4ab252ff4e9cb2": 2, "tar": [2, 4, 8, 10, 14], "gz": [2, 4, 8, 10, 14], "304kb": 2, "307kb": 2, "45": [2, 7, 10, 11], "8mb": [2, 8], "requir": [2, 3, 4, 6, 7, 8, 10, 13, 14], "alreadi": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14], "satisfi": [2, 3, 4, 6, 8, 10, 13, 14], "usr": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "local": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "lib": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "python3": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "dist": [2, 3, 4, 6, 8, 9, 10, 13, 14, 15], "deplaci": [2, 10], "58": [2, 5, 7], "87b6286c9578fc456de1363f877228ee0d117b8de238e3e2cd49dbc06eaa": 2, "c1": 2, "09": 2, "1215cb6f6ef0cfc9dbb427a961fda8a47c111955f782f659ca2d38c79adc": 2, "10": [2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], "6mb": [2, 8], "28": [2, 10, 14, 15], "7mb": [2, 4], "srsly": [2, 10], "request": [2, 3, 4, 5, 6, 8, 10, 13, 14, 15], "23": [2, 4, 7, 8, 9, 10, 13, 14, 15], "thinc": [2, 10], "presh": [2, 10], "wasabi": [2, 10], "plac": 2, "cymem": [2, 10], "bli": [2, 10], "tqdm": [2, 4, 6, 8, 9, 10, 11, 12, 13, 14], "38": [2, 7, 8, 10], "41": [2, 4, 7, 8, 9], "murmurhash": [2, 10], "numpi": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "15": [2, 3, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "catalogu": [2, 10], "setuptool": [2, 10], "54": [2, 7, 8, 13], "tinydb": [2, 4, 8, 13], "af": [2, 8], "cd": [2, 8, 9], "1ce3d93818cdeda0446b8033d21e5f32daeb3a866bbafd878a9a62058a9c": [2, 8], "crfsuit": [2, 3, 4, 6, 8, 10, 13], "79": [2, 4, 7, 8, 9, 10], "47": [2, 7, 8, 9, 14], "58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429": [2, 8], "python_crfsuit": [2, 3, 6, 8, 10, 13], "cp37": [2, 4, 8, 13], "cp37m": [2, 4, 8, 13], "manylinux1_x86_64": [2, 4, 8, 13], "743kb": [2, 8], "747kb": [2, 8], "68": [2, 7, 13], "5mb": [2, 4], "chardet": [2, 4, 8, 10, 13], "urllib3": [2, 3, 4, 6, 8, 10, 13, 14], "26": [2, 4, 8, 10, 11, 13, 14, 15], "21": [2, 3, 4, 6, 7, 8, 9, 10, 13, 14, 15], "24": [2, 4, 7, 8, 10, 13, 14, 15], "certifi": [2, 3, 4, 6, 8, 10, 13, 14], "2017": [2, 3, 4, 6, 8, 10, 13, 14], "2020": [2, 4, 8], "12": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "idna": [2, 3, 4, 6, 8, 10, 13, 14], "importlib": [2, 4, 8, 13], "20": [2, 4, 7, 8, 9, 10, 11, 13, 14, 15], "python_vers": [2, 4, 8], "34": [2, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15], "type": [2, 4, 7, 8, 10, 11, 12, 13, 14, 15], "extens": [2, 4, 8, 9, 10, 13, 14], "zipp": [2, 4, 8, 13], "creat": [2, 4, 7, 8, 10, 14, 15], "filenam": [2, 4, 8, 10, 13, 14, 15], "linux_x86_64": [2, 10, 13], "size": [2, 4, 8, 10, 12, 14, 15], "5626703": 2, "sha256": [2, 4, 8, 10, 14], "a58565fc21a1f9d3a7c51a3aea138cf612babbefb36ae05cbaccec852b55d967": 2, "store": [2, 4, 8, 10, 13, 14], "directori": [2, 4, 8, 10, 14], "root": [2, 4, 8, 10, 12, 14], "cach": [2, 4, 8, 10, 14], "0c": 2, "9d": 2, "db": 2, "6d3404c33da5b7adb6c6972853efb6a27649d3ba15f7e9bebb": 2, "successfulli": [2, 3, 4, 5, 6, 8, 10, 13, 14], "built": [2, 4, 8, 10, 14], "load": [2, 5, 10, 11, 12, 13, 14, 15], "do": [2, 7, 9, 11, 12, 14, 15], "pars": [2, 6, 10], "call": [2, 5, 6, 7, 13, 14], "sentenc": [2, 5, 6, 10, 14, 15], "\u0e1e\u0e27\u0e01\u0e40\u0e23\u0e32\u0e43\u0e0a": 2, "\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": [2, 5], "visual": [2, 6, 9, 12, 15], "tree": [2, 6], "graphviz": 2, "dot": 2, "pre": [3, 5, 7, 15], "0b4": 3, "22": [3, 4, 5, 6, 7, 8, 10, 11, 13, 14, 15], "31": [3, 6, 7, 9, 13, 14, 15], "charset": [3, 6, 14], "normal": [3, 6, 13, 14], "2023": [3, 6, 14], "cp310": [3, 6, 14], "manylinux_2_17_x86_64": [3, 6, 10, 13, 14], "manylinux2014_x86_64": [3, 4, 6, 8, 10, 13, 14], "993": [3, 6, 14], "16": [3, 7, 8, 9, 11, 13, 15], "corpu": [3, 4, 5, 6, 7, 9, 10, 11, 15], "thai_word": [3, 7], "syllable_token": [3, 7], "all_thai_words_dict": 3, "18": [3, 4, 7, 9, 11, 13, 15], "khave": 3, "khaveeverifi": 3, "kv": 3, "39": [3, 5, 7, 8, 9, 10, 11, 12, 14, 15], "\u0e40\u0e17\u0e2d\u0e0d": 3, "\u0e08": [3, 7, 9, 11, 12, 14, 15], "\u0e1a": [3, 7, 9, 10, 11, 12, 14, 15], "list_sumpu": 3, "try": [3, 5, 7, 10, 12, 14], "is_sumpu": 3, "except": [3, 12], "pass": [3, 7, 13, 15], "print": [3, 4, 7, 9, 11, 12, 15], "\u0e2d": [3, 7, 9, 11, 12, 14, 15], "\u0e1f": [3, 7], "\u0e16": [3, 7, 9, 11], "\u0e2b\u0e25": [3, 6, 12, 14], "\u0e17\u0e27": 3, "\u0e1b": [3, 7, 9, 11, 14, 15], "\u0e07": [3, 7, 9, 11, 12, 14, 15], "\u0e2b": [3, 11], "\u0e04": [3, 7, 8, 9, 11, 12, 14], "\u0e2b\u0e19": [3, 7, 9, 14], "\u0e04\u0e23": [3, 5, 7, 9, 11, 12], "we": [4, 5, 6, 7, 9, 11, 12, 13, 14, 15], "machin": 4, "The": [4, 6, 7, 9, 10, 11, 14, 15], "vistec": [4, 14], "depa": 4, "thailand": 4, "artifici": 4, "intellig": [4, 12], "research": [4, 10, 14], "institut": 4, "fairseq": 4, "ab": 4, "92c6efb05ffdfe16fbdc9e463229d9af8c3b74dc943ed4b4857a87b223c2": 4, "dataclass": 4, "2f": 4, "1095cdc2868052dd1e64520f7c0d5c8c550ad297e944e641dbf1ffbb9a5d": 4, "cython": 4, "hydra": 4, "core": [4, 10], "52": [4, 7], "e3": [4, 10], "fbd70dd0d3ce4d1d75c22d56c0c9f895cfa7ed6587a9ffb821d6812d6a60": 4, "hydra_cor": 4, "123kb": 4, "133kb": 4, "cffi": [4, 13], "sacrebleu": 4, "7e": 4, "57": [4, 7, 9, 10, 11], "0c7ca4e31a126189dab99c19951910bd081dea5bbd25f24b77107750eae7": 4, "54kb": 4, "61kb": 4, "3mb": [4, 8], "torch": [4, 9, 10, 12, 13, 14], "cu101": 4, "regex": [4, 6, 8, 10, 13, 14], "2019": [4, 7, 8, 10, 13, 14], "omegaconf": 4, "d0": 4, "eb": [4, 10], "9d63ce09dd8aa85767c65668d5414958ea29648a0eec80a4a7d311ec2684": 4, "antlr4": 4, "runtim": 4, "56": [4, 7, 9], "02": [4, 9, 11, 12], "789a0bddf9c9b31b14c3e79ec22b9656185a803dc31c15f006f9855ece0d": 4, "112kb": 4, "4mb": [4, 8], "resourc": [4, 13], "pycpars": [4, 13], "portalock": 4, "89": [4, 10], "a6": 4, "3814b7107e0788040870e8825eebf214d72166adf656ba7d4bf14759a06a": 4, "py2": [4, 10], "pyyaml": [4, 10, 13, 14], "7a": 4, "a5": 4, "393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a": 4, "636kb": 4, "645kb": 4, "0mb": [4, 8], "antlr4_python3_runtim": 4, "141231": 4, "7443fbcc47b93d3b320b897cf91d8b947b6fdc6a0795dcce01ed16fd31c8ab6d": 4, "e2": [4, 15], "fa": 4, "b78480b448b8579ddf393bebd3f47ee23aa84c89b6a78285c8": 4, "found": [4, 5, 10, 13, 15], "exist": [4, 10, 13], "uninstal": [4, 10, 13], "sacremos": [4, 8, 13], "43": [4, 7, 8, 9, 10, 11, 14], "f5": [4, 8], "99": [4, 8, 9, 11, 12], "e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577": [4, 8], "click": [4, 6, 8, 10, 13], "joblib": [4, 6, 8, 10, 13], "six": [4, 8, 9, 10, 13], "archiv": [4, 8, 9, 11, 15], "dev": [4, 5, 7, 9, 10, 11, 14, 15], "zip": [4, 8, 9, 11, 15], "upgrad": 4, "dev0": [4, 8], "11003566": 4, "b64ebc4010c51f2644c15473edd0c49540644725a367c28baa0d3f3e19edcccb": 4, "tmp": 4, "ephem": 4, "zkojv2_o": 4, "4e": 4, "1e": [4, 9, 11, 13], "26f3198c6712ecfbee92928ed1dde923a078da3d222401cc78": 4, "download_model_al": 4, "scb_1m_en": 4, "th_mose": 4, "100": [4, 5, 7, 9, 11, 12, 14, 15], "1174648148": 4, "81506882": 4, "14it": 4, "scb_1m_th": 4, "en_spm": 4, "703780432": 4, "08": [4, 7, 10, 11, 13], "78234386": 4, "81it": 4, "enthtransl": 4, "thentransl": 4, "en": [4, 13], "have": [4, 12, 14, 15], "bpe": 4, "want": [4, 10, 12, 14], "fri": 4, "chicken": 4, "\u0e44\u0e01": [4, 7, 9], "\u0e17\u0e2d\u0e14\u0e04": 4, "\u0e30": [4, 9, 11, 15], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e01": 4, "\u0e19\u0e44\u0e01": 4, "\u0e17\u0e2d\u0e14": [4, 9], "\u0e1c\u0e21\u0e2d\u0e22\u0e32\u0e01\u0e40\u0e02": 4, "\u0e22\u0e19\u0e42\u0e1b\u0e23\u0e41\u0e01\u0e23\u0e21\u0e04\u0e2d\u0e21\u0e1e": 4, "\u0e27\u0e40\u0e15\u0e2d\u0e23": 4, "write": [4, 11, 12], "comput": 4, "program": 4, "rust": 5, "node": 5, "bind": 5, "similarli": 5, "newmm": [5, 7, 10, 14], "maxim": 5, "match": [5, 7], "base": [5, 6, 7, 8, 9, 10, 11, 14, 15], "honor": [5, 12], "charact": [5, 12], "cluster": [5, 14], "boundari": 5, "howev": [5, 12], "compar": 5, "pure": 5, "implement": 5, "much": [5, 12], "faster": 5, "For": [5, 7, 9, 10, 13, 14, 15], "comparison": 5, "refer": 5, "benchmark": [5, 11], "segment": [5, 6, 10], "lern": 5, "more": [5, 6, 7, 9, 10, 12, 14, 15], "about": [5, 7, 9, 12], "here": [5, 7, 12, 14], "In": [5, 11, 14], "learn": [5, 9, 11, 12], "serv": 5, "first": [5, 6, 11, 14], "without": [5, 7, 12], "specifi": [5, 7, 14], "paramet": [5, 7, 13], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e15": [5, 8], "\u0e14\u0e04\u0e33\u0e20\u0e32\u0e29\u0e32\u0e44\u0e17\u0e22": 5, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a": [5, 7, 8], "\u0e14": [5, 7, 8, 9, 11, 14, 15], "\u0e04\u0e33": 5, "now": [5, 12], "enhanc": 5, "countri": [5, 12, 14], "wget": [5, 9, 11, 13, 15], "command": 5, "It": [5, 7, 8, 9, 10, 11, 14, 15], "plain": 5, "contain": [5, 9, 11], "one": [5, 7, 12, 15], "per": [5, 8], "line": [5, 9, 12], "raw": [5, 9, 11, 13, 14, 15], "countries_th": 5, "txt": [5, 6, 9, 11], "2021": [5, 6, 8, 13, 14], "06": [5, 7, 11], "05": [5, 9, 13], "resolv": [5, 15], "140": [5, 9, 15], "82": [5, 11, 15], "112": [5, 7], "connect": [5, 15], "443": [5, 15], "sent": [5, 7, 10, 15], "await": [5, 15], "respons": [5, 15], "302": [5, 15], "locat": [5, 7, 10, 15], "githubusercont": [5, 15], "follow": [5, 6, 7, 9, 15], "185": [5, 15], "199": [5, 15], "108": [5, 15], "133": [5, 15], "109": 5, "200": [5, 9, 12, 15], "ok": [5, 7, 15], "length": [5, 8, 13, 15], "7622": 5, "4k": 5, "save": [5, 9, 11, 12, 13, 15], "44k": 5, "70": [5, 7, 9, 11, 13], "load_dict": 5, "function": [5, 6, 7, 9, 12, 14], "content": [5, 11], "success": [5, 12], "name": [5, 9, 10, 11, 12, 13, 14], "ha": [5, 8, 12, 14, 15], "been": [5, 12, 14], "final": [5, 6], "method": [5, 14], "\u0e2a\u0e27": [5, 9, 11], "\u0e2a\u0e14": [5, 9, 11], "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 5, "\u0e40\u0e01\u0e32\u0e2b\u0e25": 5, "\u0e1a\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 5, "\u0e44\u0e17\u0e22": [5, 11], "demonstr": 6, "chunk_pars": 6, "phrase": 6, "orchid": 6, "read": [6, 7, 10, 11, 12, 13], "pull": [6, 15], "524": 6, "need": [6, 14], "nltk": [6, 10], "preprocess": [6, 9, 11, 14], "data": [6, 10, 11, 12, 13, 14], "svgling": 6, "svgwrite": 6, "66": [6, 7], "modul": [6, 7, 9, 10, 12], "word_token": [6, 7, 9, 11, 15], "take": [6, 9, 12, 15], "pos_tag": [6, 7, 8], "mark": [6, 7], "them": [6, 7, 11, 12], "part": [6, 10, 14], "speech": [6, 10, 13], "po": [6, 9, 10], "insid": [6, 7, 13], "outsid": [6, 7, 12], "begin": [6, 7, 12], "iob": 6, "conlltags2tre": 6, "convert": [6, 7, 13], "format": [6, 15], "svg": 6, "defin": 6, "new": [6, 7, 9, 12, 15], "test": [6, 7, 9, 11, 12], "input": [6, 11, 13, 14], "perform": [6, 7, 9, 11, 12, 14], "combin": 6, "tripl": 6, "p": [6, 7, 9, 13, 15], "m": [6, 7, 9], "w": [6, 7, 11, 15], "t": [6, 7, 8, 9, 10, 12, 14], "engin": [6, 7, 8, 10], "perceptron": [6, 10], "sever": [6, 12], "draw_tre": 6, "syntact": 6, "were": [6, 10, 12, 14], "\u0e41\u0e21\u0e27\u0e01": 6, "\u0e19\u0e1b\u0e25\u0e32": 6, "\u0e04\u0e19\u0e2b\u0e19\u0e2d\u0e07\u0e04\u0e32\u0e22\u0e40\u0e1b": 6, "\u0e19\u0e04\u0e19\u0e19": 6, "\u0e32\u0e23": [6, 7], "\u0e1b\u0e25\u0e32\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e43\u0e19\u0e19": 6, "\u0e33": [6, 9, 15], "\u0e33\u0e21": 6, "\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e17\u0e33\u0e44\u0e21\u0e40\u0e02\u0e32\u0e23": 6, "\u0e01\u0e04": 6, "\u0e13": [6, 7, 9, 11], "\u0e04\u0e19\u0e2d\u0e30\u0e44\u0e23\u0e2d\u0e22": 6, "\u0e07\u0e15": [6, 7, 12], "\u0e19\u0e44\u0e21": [6, 7, 15], "basic": 7, "uncom": [7, 9, 11, 12, 15], "run": [7, 9, 10, 11, 12, 13, 15], "colab": [7, 9, 10, 11, 12, 13, 14, 15], "extra": 7, "blob": [7, 13], "epitran": 7, "__version__": 7, "provid": [7, 8, 11, 12, 14], "some": [7, 10, 12, 14], "readi": 7, "set": [7, 9, 10, 11, 12, 13, 14, 15], "g": [7, 10, 14], "conson": 7, "vowel": 7, "tonemark": 7, "symbol": 7, "conveni": 7, "There": 7, "ar": [7, 9, 10, 11, 12, 13, 14, 15], "also": [7, 9, 12, 15], "few": [7, 12], "util": [7, 13], "thai_charact": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e24\u0e26\u0e30": 7, "\u0e32\u0e33": [7, 15], "\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45": 7, "\u0e2f": 7, "\u0e46": [7, 9, 11], "\u0e50\u0e51\u0e52\u0e53\u0e54\u0e55\u0e56\u0e57\u0e58\u0e59": 7, "88": [7, 10], "thai_conson": 7, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e": 7, "44": 7, "\u0e54": 7, "thai_digit": 7, "isthai": 7, "fals": [7, 8, 9, 10, 11, 12, 13], "ignore_char": 7, "counthai": 7, "proport": 7, "ignor": [7, 11], "non": [7, 15], "alphabet": 7, "default": [7, 8, 9, 10, 11, 14], "countthai": 7, "\u0e19\u0e2d\u0e32\u0e17": [7, 11, 14], "\u0e15\u0e22": [7, 11, 14], "\u0e19\u0e32\u0e04\u0e21": 7, "2562": [7, 14], "67": 7, "85714285714286": 7, "sort": [7, 12], "accord": 7, "\u0e2d\u0e19": [7, 9, 11, 14], "\u0e01\u0e23\u0e30\u0e14\u0e32\u0e29": 7, "\u0e01\u0e23\u0e23\u0e44\u0e01\u0e23": 7, "\u0e44\u0e02": [7, 11], "\u0e1c": [7, 9, 11, 12, 15], "\u0e32\u0e44\u0e2b\u0e21": 7, "revers": 7, "dai": [7, 11, 12, 15], "month": 7, "buddhist": 7, "era": 7, "b": [7, 8, 9, 11, 12], "direct": [7, 12], "similar": [7, 12, 14], "datetim": 7, "strftime": 7, "thai_strftim": 7, "fmt": [7, 9, 14], "a\u0e17": 7, "d": [7, 9, 13], "\u0e28": [7, 9, 11, 12, 14], "y": [7, 9, 11, 12, 15], "\u0e40\u0e27\u0e25\u0e32": 7, "h": 7, "1976": 7, "40": [7, 9, 10, 11, 13], "\u0e19\u0e1e": [7, 12, 14], "\u0e18\u0e17": 7, "\u0e25\u0e32\u0e04\u0e21": 7, "2519": 7, "01": [7, 9, 11, 12, 13], "modifi": 7, "appli": [7, 9, 11], "right": [7, 11, 14, 15], "befor": [7, 9, 11, 12, 13], "minu": 7, "pad": [7, 13, 14], "numer": [7, 11], "result": [7, 10, 12, 13, 14], "avail": [7, 14], "underscor": 7, "space": 7, "zero": [7, 13], "upper": 7, "case": [7, 10, 11, 12, 14], "swap": 7, "o": [7, 8, 13, 14], "letter": [7, 15], "altern": 7, "note": [7, 10, 14, 15], "thai_tim": 7, "renam": 7, "time_to_thaiword": 7, "\u0e19\u0e22": 7, "\u0e19\u0e32\u0e2c": 7, "\u0e01\u0e32\u0e2a": 7, "\u0e1a\u0e2a": 7, "\u0e19\u0e32\u0e17": [7, 9], "\u0e1a\u0e40\u0e01": 7, "\u0e32\u0e27": [7, 9], "wai": [7, 14], "chosen": 7, "24h": 7, "6h": 7, "m6h": 7, "yourself": [7, 12], "\u0e40\u0e17": 7, "\u0e22\u0e07\u0e04": 7, "\u0e19\u0e2a": [7, 11], "precis": 7, "well": [7, 14], "minut": [7, 9, 12], "second": [7, 15], "onli": [7, 10, 12], "valu": [7, 12], "30": [7, 9, 14, 15], "\u0e2a\u0e2d\u0e07\u0e42\u0e21\u0e07\u0e40\u0e0a": 7, "\u0e32\u0e2a": 7, "\u0e1a\u0e40\u0e08": 7, "\u0e14\u0e19\u0e32\u0e17": 7, "\u0e41\u0e1b\u0e14\u0e42\u0e21\u0e07\u0e2a": 7, "\u0e2b\u0e01\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e04\u0e23": 7, "object": [7, 13], "\u0e1a\u0e2a\u0e32\u0e21\u0e19\u0e32\u0e2c": 7, "\u0e1a\u0e2b": 7, "\u0e32\u0e22\u0e42\u0e21\u0e07\u0e2a": 7, "At": 7, "sub": 7, "crfcut": [7, 10], "uss": 7, "sent_token": 7, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e1a": 7, "\u0e0d\u0e0d": 7, "\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e01\u0e32\u0e23\u0e1b\u0e01\u0e04\u0e23\u0e2d\u0e07\u0e41\u0e1c": 7, "\u0e19\u0e2a\u0e22\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e04\u0e23\u0e32\u0e27": 7, "\u0e17\u0e18\u0e28": 7, "\u0e01\u0e23\u0e32\u0e0a": 7, "\u0e52\u0e54\u0e57\u0e55": 7, "\u0e19\u0e23": [7, 11], "\u0e10\u0e18\u0e23\u0e23\u0e21\u0e19": 7, "\u0e0d\u0e09\u0e1a": 7, "\u0e1a\u0e0a": 7, "\u0e0b": [7, 9, 11, 12, 14], "\u0e07\u0e16": 7, "\u0e2d\u0e27": [7, 8, 9], "\u0e32\u0e40\u0e1b": [7, 11], "\u0e1a\u0e41\u0e23\u0e01\u0e41\u0e2b": 7, "\u0e07\u0e23\u0e32\u0e0a\u0e2d\u0e32\u0e13\u0e32\u0e08": 7, "\u0e01\u0e23\u0e2a\u0e22\u0e32\u0e21": 7, "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e43\u0e0a": 7, "\u0e40\u0e21": [7, 9, 11], "\u0e19\u0e17": [7, 9, 10, 11, 12, 14], "27": [7, 8, 10, 11, 13, 14, 15], "\u0e19\u0e32\u0e22\u0e19": 7, "2475": 7, "\u0e42\u0e14\u0e22\u0e40\u0e1b": 7, "\u0e19\u0e1c\u0e25\u0e1e\u0e27\u0e07\u0e2b\u0e25": 7, "\u0e07\u0e01\u0e32\u0e23\u0e1b\u0e0f": 7, "\u0e42\u0e14\u0e22\u0e04\u0e13\u0e30\u0e23\u0e32\u0e29\u0e0e\u0e23": 7, "nwhitespac": 7, "newlin": 7, "whitespac": 7, "maximum": [7, 8], "algorithm": 7, "\u0e08\u0e30\u0e23": 7, "\u0e04\u0e27\u0e32\u0e21\u0e0a": 7, "\u0e27\u0e23": 7, "\u0e32\u0e22\u0e17": 7, "\u0e17\u0e33\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30\u0e04\u0e07\u0e08\u0e30\u0e44\u0e21": 7, "\u0e22\u0e2d\u0e21\u0e43\u0e2b": 7, "\u0e17\u0e33\u0e19\u0e32\u0e1a\u0e19\u0e2b\u0e25": 7, "\u0e07\u0e04\u0e19": 7, "nnewmm": 7, "keep_whitespac": 7, "\u0e08\u0e30": [7, 9, 11], "\u0e04\u0e27\u0e32\u0e21": [7, 9], "\u0e17\u0e33": [7, 9], "\u0e44\u0e27": 7, "\u0e41\u0e25\u0e30": [7, 9, 11, 12, 15], "\u0e04\u0e07\u0e08\u0e30": 7, "other": [7, 12, 14, 15], "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e09\u0e1a": 7, "\u0e1a\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2b\u0e21": 7, "\u0e41\u0e25": [7, 9, 11, 14], "longest": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "custom_token": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19": 7, "\u0e09\u0e1a": 7, "\u0e43\u0e2b\u0e21": [7, 9, 11], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28": 7, "\u0e01\u0e0e\u0e2b\u0e21\u0e32\u0e22": 7, "common": [7, 15], "add": [7, 8, 14], "remov": [7, 13, 14], "\u0e22\u0e32\u0e22\u0e27": 7, "\u0e17\u0e22\u0e32\u0e28\u0e32\u0e2a\u0e15\u0e23": [7, 12], "\u0e02\u0e2d\u0e07\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "\u0e2d\u0e2a": 7, "\u0e21\u0e2d\u0e1f": 7, "frozenset": 7, "\u0e44\u0e2d\u0e41\u0e0b\u0e04": 7, "isaac": 7, "asimov": 7, "\u0e22\u0e32\u0e22": 7, "\u0e02\u0e2d\u0e07": [7, 9, 11, 15], "\u0e21\u0e2d": 7, "trie": 7, "ilo87": 7, "\u0e32\u0e14": 7, "\u0e27\u0e22\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e04": 7, "\u0e21\u0e04\u0e23\u0e2d\u0e07\u0e2a": 7, "\u0e17\u0e18": [7, 11, 14], "\u0e43\u0e19\u0e01\u0e32\u0e23\u0e23\u0e27\u0e21\u0e15": 7, "ilo98": 7, "\u0e27\u0e22\u0e2a": 7, "\u0e27\u0e41\u0e25\u0e30\u0e01\u0e32\u0e23\u0e23": 7, "\u0e27\u0e21\u0e40\u0e08\u0e23\u0e08\u0e32\u0e15": 7, "\u0e2d\u0e23\u0e2d\u0e07": 7, "new_word": 7, "\u0e01\u0e32\u0e23\u0e23": 7, "\u0e40\u0e2a\u0e23": 7, "\u0e20\u0e32\u0e1e\u0e43\u0e19\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e41\u0e23\u0e07\u0e07\u0e32\u0e19\u0e2a": 7, "\u0e21\u0e1e": 7, "\u0e19\u0e18": [7, 15], "union": 7, "custom_dictionary_tri": 7, "custom_dict": 7, "ilo": 7, "87": 7, "\u0e27\u0e22": [7, 9, 11], "\u0e20\u0e32\u0e1e": 7, "\u0e43\u0e19": [7, 9, 11], "\u0e01\u0e32\u0e23\u0e2a\u0e21\u0e32\u0e04\u0e21": 7, "\u0e01\u0e32\u0e23": [7, 9, 11, 15], "\u0e21\u0e04\u0e23\u0e2d\u0e07": 7, "\u0e23\u0e27\u0e21\u0e15": 7, "98": [7, 11], "\u0e27\u0e21": [7, 9], "\u0e40\u0e08\u0e23\u0e08\u0e32": 7, "differ": [7, 15], "speedtest_text": 7, "\u0e04\u0e23\u0e1a\u0e23\u0e2d\u0e1a": 7, "\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e40\u0e0a": [7, 11, 14], "\u0e19\u0e19": [7, 9, 11], "2547": 7, "\u0e21\u0e19": [7, 15], "\u0e21\u0e0a\u0e32\u0e22\u0e01\u0e27": 7, "\u0e32": [7, 8, 9, 11, 12, 14, 15], "370": 7, "\u0e04\u0e19": [7, 9, 11, 14], "\u0e01\u0e42\u0e22\u0e19\u0e02": 7, "\u0e19\u0e23\u0e16\u0e22": 7, "\u0e40\u0e2d": [7, 9], "\u0e21\u0e0b": 7, "\u0e2b\u0e23": [7, 9, 11, 14], "\u0e19\u0e2d\u0e19\u0e0b": 7, "\u0e2d\u0e19\u0e01": [7, 9], "\u0e19\u0e04": 7, "\u0e19\u0e25\u0e30": 7, "\u0e40\u0e14": [7, 11, 12], "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e16\u0e32\u0e19": 7, "\u0e15\u0e33\u0e23\u0e27\u0e08\u0e15\u0e32\u0e01\u0e43\u0e1a": 7, "\u0e44\u0e1b\u0e44\u0e01\u0e25": 7, "150": [7, 9], "\u0e42\u0e25\u0e40\u0e21\u0e15\u0e23": [7, 14], "\u0e44\u0e1b\u0e16": 7, "\u0e07\u0e04": 7, "\u0e32\u0e22\u0e2d": 7, "\u0e07\u0e04\u0e22": 7, "\u0e17\u0e18\u0e1a\u0e23": 7, "\u0e2b\u0e32\u0e23": 7, "\u0e40\u0e27\u0e25\u0e32\u0e01\u0e27": 7, "\u0e27\u0e42\u0e21\u0e07": 7, "\u0e43\u0e19\u0e2d": [7, 14], "\u0e01\u0e04\u0e14": 7, "\u0e0d\u0e32\u0e15": 7, "\u0e2d\u0e07\u0e23": [7, 9], "\u0e10": 7, "\u0e04\u0e14": 7, "\u0e08\u0e1a\u0e25\u0e07\u0e17": 7, "\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e19": 7, "\u0e1b\u0e23\u0e30\u0e19\u0e2d\u0e21\u0e22\u0e2d\u0e21\u0e04\u0e27\u0e32\u0e21": 7, "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21\u0e08": 7, "\u0e32\u0e22\u0e04": 7, "\u0e19\u0e44\u0e2b\u0e21\u0e17\u0e14\u0e41\u0e17\u0e19\u0e23\u0e27\u0e21": 7, "42": [7, 8, 9, 10, 13], "\u0e32\u0e19\u0e1a\u0e32\u0e17\u0e43\u0e2b": 7, "\u0e1a\u0e0d\u0e32\u0e15": 7, "\u0e22\u0e2b\u0e32\u0e22": 7, "\u0e23\u0e32\u0e22": 7, "\u0e14\u0e2b": 7, "\u0e1a\u0e41\u0e25\u0e30\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e2a\u0e23": 7, "\u0e08\u0e41\u0e25": 7, "\u0e27\u0e22\u0e40\u0e25": 7, "\u0e2d\u0e01\u0e15": 7, "\u0e07\u0e17": [7, 9], "\u0e40\u0e02\u0e15": 7, "\u0e41\u0e02\u0e27\u0e07\u0e2b": 7, "\u0e27\u0e2b\u0e21\u0e32\u0e01": 7, "\u0e40\u0e02\u0e15\u0e1a\u0e32\u0e07\u0e01\u0e30\u0e1b": 7, "\u0e01\u0e23": [7, 11], "\u0e07\u0e40\u0e17\u0e1e\u0e21\u0e2b\u0e32\u0e19\u0e04\u0e23": [7, 11], "\u0e2a\u0e21": [7, 12], "\u0e41\u0e25\u0e30\u0e15": 7, "\u0e27\u0e41\u0e17\u0e19\u0e1e\u0e23\u0e23\u0e04\u0e01\u0e32\u0e23\u0e40\u0e21": 7, "\u0e2d\u0e07\u0e08\u0e32\u0e01\u0e2b\u0e25\u0e32\u0e22\u0e1e\u0e23\u0e23\u0e04\u0e15": 7, "\u0e32\u0e07\u0e21\u0e32\u0e40\u0e1d": 7, "\u0e07\u0e40\u0e01\u0e15\u0e01\u0e32\u0e23\u0e19": 7, "\u0e1a\u0e04\u0e30\u0e41\u0e19\u0e19\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e43\u0e01\u0e25": 7, "\u0e42\u0e14\u0e22": [7, 9, 11], "\u0e20": [7, 8], "\u0e2a\u0e23": [7, 9, 14], "\u0e42\u0e0a\u0e15": [7, 9], "\u0e40\u0e14\u0e0a\u0e32\u0e0a": 7, "\u0e22\u0e19": [7, 8, 9, 10, 11, 14], "\u0e19\u0e15": [7, 9, 14, 15], "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1e\u0e25": 7, "\u0e07\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e23": 7, "\u0e41\u0e25\u0e30\u0e1e\u0e23": 7, "\u0e29\u0e10": 7, "\u0e0a\u0e23\u0e2a": 7, "\u0e08\u0e32\u0e01\u0e1e\u0e23\u0e23\u0e04\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e18": 7, "\u0e44\u0e14": [7, 9, 11, 12, 14, 15], "\u0e04\u0e30\u0e41\u0e19\u0e19": 7, "96": 7, "\u0e04\u0e30\u0e41\u0e19\u0e19\u0e40\u0e17": 7, "\u0e32\u0e01": [7, 14], "\u0e40\u0e21\u0e29\u0e32\u0e22\u0e19": [7, 11], "\u0e07\u0e40\u0e1b": 7, "\u0e19\u0e27": 7, "\u0e19\u0e2d": [7, 11], "\u0e2a\u0e40\u0e15\u0e2d\u0e23": 7, "\u0e19\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e02\u0e2d\u0e07\u0e0a\u0e32\u0e27\u0e04\u0e23": 7, "\u0e2a\u0e15": 7, "\u0e40\u0e01": [7, 9, 11, 14], "\u0e14\u0e40\u0e2b\u0e15": 7, "\u0e23\u0e30\u0e40\u0e1a": 7, "\u0e14\u0e15": 7, "\u0e2d\u0e40\u0e19": 7, "\u0e2d\u0e07\u0e43\u0e19\u0e42\u0e1a\u0e2a\u0e16": 7, "\u0e41\u0e25\u0e30\u0e42\u0e23\u0e07\u0e41\u0e23\u0e21\u0e2d\u0e22": 7, "\u0e32\u0e07\u0e19": 7, "\u0e2d\u0e22": [7, 8, 9, 11, 14], "\u0e41\u0e2b": [7, 15], "\u0e07\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e28\u0e23": 7, "\u0e07\u0e01\u0e32": 7, "\u0e15\u0e41\u0e25": 7, "\u0e27\u0e2d\u0e22": 7, "156": 7, "\u0e41\u0e25\u0e30\u0e1a\u0e32\u0e14\u0e40\u0e08": 7, "\u0e1a\u0e2b\u0e25\u0e32\u0e22\u0e23": 7, "\u0e2d\u0e22\u0e04\u0e19": 7, "\u0e07\u0e44\u0e21": 7, "\u0e02": [7, 9, 11, 14], "\u0e2d\u0e21": [7, 11, 14], "\u0e25\u0e27": 7, "\u0e32\u0e1c": 7, "\u0e2d\u0e40\u0e2b\u0e15": 7, "\u0e21\u0e32\u0e08\u0e32\u0e01\u0e1d": 7, "\u0e32\u0e22\u0e43\u0e14": 7, "\u0e19\u0e01\u0e33\u0e2b\u0e19\u0e14\u0e08": 7, "\u0e14\u0e01\u0e32\u0e23\u0e1b\u0e23\u0e30\u0e0a": 7, "\u0e21\u0e02": [7, 11, 14], "\u0e2d\u0e23": [7, 9], "\u0e40\u0e23": [7, 10, 11], "\u0e21\u0e2a\u0e32\u0e22\u0e41\u0e16\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e2a": 7, "\u0e19\u0e17\u0e32\u0e07\u0e43\u0e19\u0e0a": 7, "\u0e27\u0e07\u0e1b\u0e25\u0e32\u0e22\u0e2a": 7, "\u0e1b\u0e14\u0e32\u0e2b": [7, 11], "\u0e01\u0e01": [7, 9, 15], "\u0e07\u0e22": 7, "\u0e2d\u0e20": 7, "\u0e21\u0e2b\u0e32\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e40\u0e0a": 7, "\u0e2d\u0e21\u0e42\u0e25\u0e01\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e40\u0e04\u0e23": [7, 9, 11], "\u0e2d\u0e07\u0e21": 7, "\u0e2d\u0e41\u0e1c": 7, "\u0e1e\u0e25": 7, "\u0e1a\u0e1f": 7, "\u0e07\u0e02": [7, 9], "\u0e08\u0e32\u0e23\u0e13": 7, "\u0e1b\u0e23\u0e30\u0e40\u0e14": [7, 11], "\u0e19\u0e01": [7, 14], "\u0e1a\u0e14": [7, 15], "\u0e01\u0e2b\u0e19": 7, "\u0e41\u0e25\u0e30\u0e04\u0e27\u0e32\u0e21\u0e44\u0e21": 7, "\u0e42\u0e1b\u0e23": 7, "\u0e07\u0e43\u0e2a": 7, "\u0e10\u0e1a\u0e32\u0e25\u0e1b": 7, "\u0e07\u0e1a\u0e2d\u0e01\u0e27": 7, "\u0e40\u0e27\u0e17": 7, "\u0e1b\u0e23\u0e30\u0e0a": 7, "belt": 7, "road": 7, "forum": 7, "\u0e43\u0e19\u0e0a": [7, 12], "\u0e27\u0e07\u0e27": 7, "\u0e2d\u0e40\u0e1b": [7, 12], "\u0e19\u0e07\u0e32\u0e19\u0e01\u0e32\u0e23\u0e17": 7, "\u0e15\u0e17": 7, "\u0e2a\u0e33\u0e04": 7, "\u0e0d\u0e17": 7, "\u0e14\u0e02\u0e2d\u0e07\u0e08": 7, "\u0e19\u0e43\u0e19\u0e1b": 7, "speed": 7, "through": [7, 12], "wrapper": 7, "cpu": [7, 9], "user": [7, 9, 10, 11], "253": 7, "sy": [7, 9], "total": [7, 9, 11, 12], "256": 7, "wall": [7, 9], "255": 7, "60": [7, 9], "\u00b5": 7, "46": [7, 10, 13, 15], "safe": 7, "33": [7, 9, 14], "attacut": [7, 10], "833": 7, "174": [7, 11], "576": 7, "possibl": [7, 15], "multi_cut": 7, "find_all_seg": 7, "mmcut": 7, "\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": [7, 9], "\u0e19\u0e44\u0e1b\u0e44\u0e14": 7, "\u0e32\u0e07\u0e44\u0e23\u0e1a": 7, "\u0e32\u0e07": [7, 9, 11], "\u0e44\u0e1b": [7, 9, 10, 11], "\u0e44\u0e23": [7, 14], "\u0e19\u0e44\u0e1b": [7, 9], "\u0e32\u0e07\u0e44\u0e23": 7, "either": 7, "ssg": [7, 10, 14], "ponrawe": 7, "__": [7, 11], "crf": 7, "prasertsom": 7, "smaller": [7, 14], "than": [7, 12, 15], "inform": [7, 9], "retriev": 7, "theeramunkong": 7, "et": 7, "al": 7, "2004": 7, "unit": 7, "35": [7, 9, 10, 14, 15], "subword_token": [7, 8], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": [7, 9], "\u0e23\u0e30": [7, 9], "\u0e44\u0e17": 7, "dict": [7, 9, 11, 12, 13], "known": [7, 15], "36": [7, 9, 10, 11, 13, 14, 15], "\u0e25\u0e40\u0e25\u0e32\u0e30": 7, "\u0e0b\u0e2d\u0e21": 7, "\u0e0b\u0e2d": 7, "\u0e2a\u0e21\u0e2d\u0e07\u0e1a\u0e27\u0e21\u0e23": 7, "\u0e19\u0e41\u0e23\u0e07": 7, "\u0e40\u0e25\u0e32\u0e30": 7, "\u0e2a\u0e21\u0e2d\u0e07": 7, "\u0e1a\u0e27\u0e21": 7, "\u0e41\u0e23\u0e07": 7, "extern": 7, "ommit": 7, "output": [7, 11, 13, 14], "37": [7, 11, 13], "These": 7, "task": [7, 10, 14], "like": [7, 9, 11, 12, 14], "cut": 7, "certain": [7, 12], "point": [7, 12, 15], "typo": 7, "tcc_po": 7, "posit": [7, 15], "ch": 7, "two": [7, 11, 12, 15], "roman": 7, "latin": 7, "royal": 7, "system": [7, 10, 12], "transcript": 7, "rtg": 7, "support": [7, 8, 15], "simpl": [7, 10, 14, 15], "royin": 7, "accur": 7, "thai2rom": 7, "context": 7, "mean": [7, 9, 12, 13], "sound": [7, 13], "represent": 7, "ipa": 7, "intern": 7, "phonet": 7, "icu": 7, "compon": 7, "unicod": 7, "pyicu": 7, "\u0e41\u0e21\u0e27": [7, 10, 15], "maeo": 7, "\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": [7, 11], "phapn": 7, "obvious": 7, "wrong": [7, 12], "m\u025b\u02d0w": 7, "updat": [7, 9, 14], "g2p": 7, "up": [7, 12], "\u025b\u02d0": 7, "p\u02b0a\u02d0pjanot": 7, "p\u02b0": 7, "a\u02d0": 7, "n": [7, 8, 11], "width": 7, "zwsp": 7, "zwnj": 7, "duplic": 7, "repeat": [7, 9], "dangl": 7, "reorder": 7, "tone": 7, "dure": 7, "\u0e40\u0e40\u0e1b\u0e25\u0e01": 7, "\u0e41\u0e1b\u0e25\u0e01": 7, "\u0e40": 7, "v": [7, 13], "\u0e41": 7, "below": 7, "standard": 7, "order": [7, 9, 11, 15], "sara": 7, "aa": 7, "mai": [7, 12, 14], "ek": 7, "\u0e40\u0e01\u0e32": 7, "includ": [7, 9, 15], "\u0e1a\u0e27": 7, "\u0e1e\u0e23": 7, "immedi": 7, "nnormal": 7, "multipl": [7, 13], "A": 7, "row": [7, 11, 15], "keep": 7, "reduc": 7, "variat": 7, "48": [7, 9, 10, 13], "\u0e40\u0e01\u0e30\u0e30\u0e30": 7, "\u0e40\u0e01\u0e30": 7, "just": [7, 12], "seri": [7, 12], "remove_zw": 7, "remove_dup_spac": 7, "remove_repeat_vowel": 7, "remove_dangl": 7, "If": [7, 10, 14], "don": [7, 12], "behavior": 7, "those": [7, 12], "shown": 7, "abov": 7, "remove_tonemark": 7, "reorder_vowel": 7, "individu": 7, "your": [7, 12, 13], "own": [7, 12], "sometim": 7, "search": [7, 14], "pythainp": 7, "deal": [7, 12], "49": 7, "arabic_digit_to_thai_digit": 7, "thai_digit_to_arabic_digit": 7, "digit_to_text": 7, "\u0e09": [7, 14], "\u0e01\u0e40\u0e09": 7, "\u0e42\u0e23\u0e1b\u0e40\u0e23": 7, "\u0e22\u0e01": 7, "\u0e51\u0e51\u0e52": 7, "50": [7, 9, 11], "51": [7, 11, 15], "\u0e07\u0e2b\u0e19": [7, 9, 12], "\u0e07\u0e2a\u0e2d\u0e07": 7, "index": [7, 9, 10, 11, 14, 15], "wikipedia": [7, 11, 12, 14], "three": 7, "kind": [7, 12], "lk82": 7, "metasound": 7, "udom83": 7, "equival": 7, "\u0e23\u0e16": [7, 9, 11], "\u0e23\u0e14": 7, "\u0e27\u0e23\u0e23": 7, "\u0e19\u0e20": 7, "\u0e23\u0e13\u0e30": 7, "\u0e23\u0e13\u0e01\u0e32\u0e23": 7, "\u0e21\u0e23\u0e23\u0e04": 7, "\u0e01\u0e29": [7, 15], "\u0e1ae400": 7, "\u0e1a930000": 7, "\u0e1a550": 7, "\u0e1ae419": 7, "\u0e1a931900": 7, "\u0e1a551": 7, "\u0e211000": 7, "\u0e21100000": 7, "\u0e21100": 7, "\u0e21310000": 7, "\u0e21551": 7, "\u0e231000": 7, "\u0e23100000": 7, "\u0e25100": 7, "\u0e23100": 7, "peter": 7, "norvig": 7, "togeth": 7, "nation": 7, "tnc": 7, "\u0e40\u0e2b\u0e25": [7, 9], "\u0e22\u0e21": 7, "correct": [7, 15], "most": [7, 12, 15], "55": [7, 11], "when": [7, 9, 10, 12, 14], "norvigspellcheck": 7, "kei": [7, 15], "int": [7, 11], "tupl": [7, 13, 15], "assign": 7, "everi": [7, 9, 12], "user_dict": 7, "1000": [7, 9, 11, 15], "\u0e22\u0e27": [7, 9, 11, 14, 15], "1000000": 7, "checker": [7, 15], "As": 7, "our": [7, 14], "give": [7, 9, 12], "edit": [7, 12, 15], "distanc": 7, "prioriti": 7, "over": 7, "textbook": 7, "By": 7, "ttc": 7, "word_freq": 7, "To": [7, 9], "current": [7, 14], "59": [7, 9, 13], "\u0e18": [7, 9, 14], "\u0e44\u0e2a": 7, "\u0e01\u0e23\u0e2d\u0e01": 7, "\u0e1b\u0e25": [7, 11], "\u0e40\u0e15": [7, 9, 11], "\u0e02\u0e2d\u0e1a\u0e04": [7, 14], "356": 7, "\u0e1b\u0e23\u0e30\u0e2a\u0e32\u0e19": 7, "84": [7, 14], "\u0e23\u0e33\u0e44\u0e23": 7, "\u0e27\u0e21\u0e17": 7, "\u0e2d\u0e07": [7, 9, 11, 14], "\u0e1d": 7, "\u0e01\u0e21\u0e30\u0e02\u0e32\u0e21": 7, "condit": 7, "filter": 7, "39963": 7, "61": [7, 11], "min_freq": [7, 9, 11, 12], "min_len": 7, "max_len": [7, 9], "30376": 7, "62": [7, 13], "checker_no_filt": 7, "dict_filt": 7, "66209": 7, "63": [7, 10], "remove_yamok": 7, "els": [7, 12, 14], "checker_custom_filt": 7, "66204": 7, "64": [7, 10, 11, 12, 13], "pos_tag_s": 7, "\u0e19\u0e17\u0e32\u0e07": 7, "fixn": 7, "vact": 7, "65": [7, 14], "\u0e1b\u0e23\u0e30\u0e01\u0e32\u0e28\u0e2a\u0e33\u0e19": 7, "\u0e01\u0e19\u0e32\u0e22\u0e01\u0e2f": 7, "\u0e2a\u0e23\u0e23\u0e40\u0e2a\u0e23": 7, "\u0e0d": [7, 11, 15], "\u0e41\u0e01": 7, "\u0e27\u0e01\u0e33\u0e40\u0e19": 7, "\u0e19\u0e08\u0e32\u0e01\u0e15\u0e33\u0e41\u0e2b\u0e19": 7, "\u0e17\u0e23\u0e07\u0e04": 7, "\u0e13\u0e27": 7, "\u0e12": 7, "\u0e40\u0e28\u0e29": [7, 9], "\u0e01\u0e2d\u0e07\u0e17": 7, "\u0e1e\u0e1a\u0e01": [7, 12], "\u0e01\u0e23\u0e30\u0e17\u0e23\u0e27\u0e07\u0e01\u0e25\u0e32\u0e42\u0e2b\u0e21": 7, "\u0e2d\u0e18": 7, "\u0e01\u0e23\u0e21\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e2a": 7, "ncmn": 7, "punc": 7, "jsbr": 7, "jcrg": 7, "vsta": 7, "tagger": [7, 14], "bio": 7, "scheme": 7, "pip3": 7, "ner": [7, 8, 10, 14], "named_ent": 7, "thainametagg": [7, 8], "get_ner": [7, 8], "2563": 7, "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e23\u0e30\u0e1a\u0e1a\u0e40\u0e27\u0e25\u0e32": 7, "\u0e19\u0e17\u0e32\u0e07\u0e08\u0e32\u0e01\u0e02\u0e19\u0e2a": 7, "\u0e07\u0e01\u0e23": 7, "\u0e07\u0e40\u0e17\u0e1e\u0e43\u0e01\u0e25": 7, "\u0e16\u0e19\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e44\u0e1b\u0e08": 7, "\u0e07\u0e2b\u0e27": [7, 14], "\u0e14\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "\u0e27\u0e23\u0e32\u0e04\u0e32": 7, "297": [7, 15], "\u0e1a\u0e32\u0e17": [7, 9], "num": [7, 13], "punct": 7, "noun": [7, 10], "verb": [7, 10, 15], "\u0e23\u0e30\u0e1a\u0e1a": [7, 9], "\u0e08\u0e32\u0e01": [7, 9, 11, 12], "adp": 7, "\u0e02\u0e19\u0e2a": 7, "organ": [7, 8, 14], "\u0e07\u0e40\u0e17\u0e1e": 7, "\u0e43\u0e01\u0e25": 7, "adj": 7, "\u0e16\u0e19\u0e19": 7, "\u0e01\u0e33\u0e41\u0e1e\u0e07\u0e40\u0e1e\u0e0a\u0e23": 7, "aux": [7, 10, 13], "\u0e23\u0e32\u0e04\u0e32": 7, "monei": [7, 12], "word_vector": [7, 15], "\u0e29\u0e22": [7, 15], "2504981": 7, "doesnt_match": [7, 15], "\u0e04\u0e04\u0e25": 7, "\u0e40\u0e08": [7, 9, 11, 12, 15], "\u0e32\u0e2b\u0e19": 7, "\u0e32\u0e17": 7, "site": 7, "gensim": [7, 15], "keyedvector": [7, 15], "877": 7, "futurewarn": [7, 15], "arrai": [7, 9, 11, 13, 14, 15], "stack": [7, 15], "must": [7, 12, 13, 15], "iter": [7, 11, 15], "deprec": [7, 10, 13, 14, 15], "rais": [7, 15], "an": [7, 12, 14, 15], "error": [7, 15], "futur": [7, 15], "vstack": [7, 15], "self": [7, 15], "word_vec": [7, 15], "use_norm": [7, 15], "used_word": [7, 15], "astyp": [7, 9, 13, 15], "real": [7, 12, 15], "69": [7, 13], "bahttext": 7, "1234567890123": 7, "\u0e07\u0e25": 7, "\u0e32\u0e19\u0e2a\u0e2d\u0e07\u0e41\u0e2a\u0e19\u0e2a\u0e32\u0e21\u0e2b\u0e21": 7, "\u0e19\u0e2b": 7, "\u0e2d\u0e22\u0e2b\u0e01\u0e2a": 7, "\u0e14\u0e25": 7, "\u0e32\u0e19\u0e41\u0e1b\u0e14\u0e41\u0e2a\u0e19\u0e40\u0e01": 7, "\u0e32\u0e2b\u0e21": 7, "\u0e19\u0e2b\u0e19": 7, "\u0e07\u0e23": [7, 11], "\u0e2d\u0e22\u0e22": 7, "\u0e1a\u0e2a\u0e32\u0e21\u0e1a\u0e32\u0e17\u0e2a": 7, "\u0e32\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "round": [7, 13], "satang": 7, "909": 7, "\u0e07\u0e1a\u0e32\u0e17\u0e40\u0e01": 7, "\u0e1a\u0e40\u0e2d": 7, "\u0e14\u0e2a\u0e15\u0e32\u0e07\u0e04": 7, "lowphansirikul": 8, "l": [8, 9, 11, 15], "polpanuma": 8, "c": [8, 9, 11, 15], "jantrakulchai": 8, "nutanong": 8, "arxiv": 8, "preprint": 8, "2101": 8, "09635": 8, "jan": 8, "full": [8, 12], "thai2transform": [8, 14], "11006400": 8, "f89b594cbbebbc1940c16b0957a74182f2ea8169de8270e33f0c6bac5d1d4fcd": 8, "9a": 8, "9e": 8, "b2ab1db5c70b14b8d5d8a402e36ed915c2ec906df5c4f4b089": 8, "f9": 8, "5ca07ec9569d2f232f3166de5457b63943882f7950ddfcc887732fc7fb23": 8, "9mb": 8, "71": 8, "2ddc317b2121117bf34dd00f5b0de194158f2a44ee2bf5e47c7166878a97": 8, "manylinux2010_x86_64": [8, 13], "filelock": [8, 10, 13, 14], "7d": 8, "09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10": 8, "883kb": 8, "890kb": 8, "pypars": [8, 10, 13], "893262": 8, "26dd1871c98e4cd5fe1938dbeba7086606c31e80a945ec9f752859e252fe7068": 8, "3c": 8, "fd": 8, "7ce5c3f0666dab31a50123635e6fb5e19ceb42ce38d4e58f45": 8, "dataset": [8, 10, 14], "thainer": [8, 10], "lst20": [8, 10], "dataset_nam": [8, 14], "\u0e17\u0e14\u0e2a\u0e2d\u0e1a\u0e1c\u0e21\u0e21": 8, "\u0e19\u0e32\u0e22\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e17\u0e17": 8, "\u0e22\u0e44\u0e1e\u0e1a": 8, "\u0e25\u0e22": 8, "ask": 8, "truncat": 8, "max_length": 8, "predefin": 8, "person": [8, 12], "\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 10, 14], "\u0e22\u0e19\u0e2a\u0e27\u0e19\u0e01": [8, 14], "\u0e2b\u0e25\u0e32\u0e1a\u0e40\u0e1b": [8, 14], "\u0e19\u0e42\u0e23\u0e07\u0e40\u0e23": [8, 14], "\u0e22\u0e19\u0e17": [8, 14], "\u0e2a\u0e27\u0e19\u0e01": [8, 14], "\u0e2b\u0e25\u0e32\u0e1a": [8, 14], "t2": [8, 14], "grouped_ent": [8, 14], "ttl": 8, "\u0e19\u0e32\u0e22": [8, 12], "\u0e27\u0e23\u0e23\u0e13\u0e1e\u0e07\u0e29": 8, "\u0e1c\u0e21\u0e21": 8, "\u0e1c\u0e21": [8, 10, 11], "pr": 8, "nn": [8, 10], "\u0e27\u0e23\u0e23\u0e13": 8, "\u0e1e\u0e07\u0e29": 8, "\u0e44\u0e1e\u0e1a": 8, "grouped_word": 8, "\u0e14\u0e04\u0e33\u0e22": 8, "detail": [9, 12], "step": [9, 11], "taken": 9, "analyz": [9, 11], "evalu": 9, "metric": [9, 11, 12, 14], "overal": 9, "accuraci": [9, 11, 12], "across": [9, 12], "neg": [9, 14, 15], "ativ": 9, "itiv": 9, "neu": 9, "tral": 9, "uestion": 9, "class": [9, 11, 12], "fasttext": [9, 15], "semi": 9, "supervis": [9, 11], "public": [9, 10, 14], "privat": 9, "72781": 9, "7499": 9, "63144": 9, "6131": 9, "71259": 9, "74194": 9, "73119": 9, "75859": 9, "One": 9, "time": [9, 11, 12], "73372": 9, "75968": 9, "kaggl": [9, 11, 15], "competit": 9, "upon": 9, "1st": 9, "place": 9, "solut": 9, "googl": [9, 11, 12, 13, 14, 15], "sklearn_crfsuit": [9, 11, 15], "emoji": [9, 10, 12, 14, 15], "fastai": [9, 11, 12, 15], "master": [9, 11, 15], "unzip": [9, 11], "mkdir": [9, 11], "wisesight_data": 9, "snippet": 9, "font": [9, 15], "matplotlib": [9, 11, 12, 15], "gist": 9, "korakot": 9, "9d7f5db632351dc92607fdec72a4953f": 9, "phonbopit": 9, "sarabun": [9, 15], "webfont": 9, "thsarabunnew": 9, "ttf": [9, 15], "cp": 9, "mpl": 9, "share": [9, 12], "truetyp": 9, "font_manag": [9, 15], "_rebuild": 9, "rc": 9, "famili": [9, 12], "load_ext": 9, "autoreload": [9, 15], "np": [9, 11, 12, 13, 14, 15], "panda": [9, 11, 12, 15], "pd": [9, 11, 12, 15], "tqdm_notebook": [9, 11, 12], "process_thai": [9, 11], "viz": [9, 11], "pyplot": [9, 11, 12, 15], "plt": [9, 11, 12, 15], "seaborn": [9, 11, 12, 14], "sn": [9, 11, 12, 14], "reload": 9, "reload_ext": [9, 15], "clean": [9, 11, 12], "rule": [9, 11], "aim": [9, 11], "spars": [9, 11], "bag": [9, 11], "pre_rul": [9, 11, 12], "post_rul": [9, 11, 12], "after": [9, 11], "\u0e32\u0e19\u0e19\u0e19\u0e19\u0e19": 9, "\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19\u0e19\u0e32\u0e19": 9, "amp": [9, 12], "www": [9, 10, 13], "\u0e32\u0e19": [9, 11], "xxrep": [9, 11], "xxwrep": 9, "\u0e19\u0e32\u0e19": 9, "xxurl": 9, "open": [9, 10, 11, 12, 13, 15], "f": [9, 11, 12, 13, 14, 15], "strip": [9, 11, 12], "readlin": 9, "train_label": 9, "categori": 9, "all_df": [9, 11], "datafram": [9, 11, 15], "to_csv": [9, 11], "shape": [9, 11, 13, 15], "24063": 9, "test_df": [9, 11], "2674": 9, "map": 9, "lambda": 9, "x": [9, 12, 13, 15], "wc": 9, "uwc": 9, "preval": 9, "value_count": [9, 11], "544612": 9, "255164": 9, "178698": 9, "021527": 9, "dtype": [9, 11, 13], "float64": [9, 11], "85": 9, "under": [9, 14], "oversampl": 9, "balanc": [9, 11], "out": [9, 12], "littl": 9, "hyperparamet": 9, "sklearn": [9, 11, 14, 15], "model_select": 9, "train_test_split": 9, "train_df": [9, 11], "valid_df": 9, "test_siz": 9, "random_st": [9, 11], "1412": [9, 11], "reset_index": [9, 11], "drop": [9, 11], "actual": 9, "copi": [9, 11], "read_csv": [9, 11, 12], "head": [9, 11, 12, 15], "\u0e19\u0e04\u0e19\u0e25\u0e1a\u0e41\u0e2d\u0e1e": 9, "viu": 9, "\u0e19\u0e43\u0e08\u0e41\u0e25\u0e30\u0e40\u0e02": 9, "\u0e32\u0e43\u0e08\u0e40\u0e02\u0e32\u0e19\u0e30\u0e04\u0e30": 9, "\u0e41\u0e1c\u0e25\u0e21": 9, "\u0e25\u0e1a": 9, "\u0e41\u0e2d": 9, "\u0e19\u0e43\u0e08": 9, "\u0e40\u0e02": [9, 11], "\u0e32\u0e43\u0e08": 9, "\u0e40\u0e02\u0e32": 9, "\u0e44\u0e1b\u0e0a\u0e21\u0e44\u0e21": 9, "\u0e27\u0e02\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e41\u0e25\u0e30\u0e23\u0e2d\u0e07\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e19\u0e08": [9, 14], "\u0e0a\u0e21": 9, "\u0e41\u0e0a\u0e21\u0e1b": 9, "\u0e23\u0e2d\u0e07": 9, "\u0e21\u0e23\u0e16\u0e0b": 9, "\u0e04\u0e40\u0e1b": 9, "\u0e19\u0e01\u0e25": 9, "\u0e21\u0e17": [9, 12], "\u0e32\u0e23\u0e33\u0e04\u0e32\u0e19\u0e21\u0e32\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01\u0e01": 9, "\u0e23\u0e33": 9, "\u0e04\u0e32\u0e19": 9, "\u0e21\u0e32\u0e01": [9, 11], "\u0e2d\u0e22\u0e32\u0e01\u0e2a\u0e27\u0e22\u0e40\u0e2b\u0e21": 9, "\u0e2d\u0e19\u0e40\u0e08": 9, "\u0e32\u0e02\u0e2d\u0e07\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e04\u0e30": 9, "\u0e40\u0e19\u0e22": 9, "\u0e01\u0e32": [9, 11], "\u0e43\u0e1a\u0e2b\u0e19": 9, "\u0e2d\u0e22\u0e32\u0e01": 9, "\u0e2a\u0e27\u0e22": 9, "\u0e40\u0e2b\u0e21": 9, "\u0e32\u0e02\u0e2d\u0e07": 9, "\u0e41\u0e1a\u0e23\u0e19\u0e14": 9, "\u0e32\u0e27\u0e42\u0e16\u0e25\u0e30\u0e23": 9, "\u0e41\u0e1e\u0e07": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30\u0e15": 9, "\u0e01\u0e40\u0e1b": 9, "\u0e19\u0e08\u0e32\u0e19\u0e46\u0e25\u0e3015": 9, "\u0e42\u0e16": 9, "\u0e25\u0e30": 9, "\u0e40\u0e1e\u0e23\u0e32\u0e30": 9, "\u0e08\u0e32\u0e19": 9, "381": 9, "218": 9, "544957": 9, "253557": 9, "180071": 9, "021415": 9, "542659": 9, "264266": 9, "170914": 9, "022161": 9, "variabl": [9, 13], "y_train": [9, 11], "y_valid": 9, "faetur": 9, "feature_extract": [9, 11], "tfidfvector": 9, "linear_model": 9, "logisticregress": 9, "tfidf": [9, 11], "ngram_rang": [9, 11], "min_df": [9, 11], "sublinear_tf": 9, "tfidf_fit": 9, "text_train": 9, "text_valid": 9, "text_test": 9, "20453": 9, "4614": 9, "3610": 9, "top_feats_al": 9, "plot_top_feat": 9, "get_feature_nam": 9, "toarrai": 9, "448": 9, "492": 9, "940": 9, "938": 9, "rank": [9, 15], "score": [9, 11, 14], "ngram": 9, "029990": 9, "022852": 9, "020252": 9, "\u0e40\u0e25\u0e22": [9, 11], "019493": 9, "018153": 9, "852": 9, "862": 9, "73": [9, 14], "count": 9, "uniqu": [9, 12], "might": [9, 12, 13], "so": [9, 12], "standardscal": 9, "scaler": 9, "scaler_fit": 9, "float": [9, 13], "mean_": 9, "var_": 9, "num_train": 9, "num_valid": 9, "num_test": 9, "96529942": 9, "22744462": 9, "1151": 9, "47512883": 9, "513": 9, "46009207": 9, "74": 9, "concaten": [9, 13, 14], "x_train": [9, 11], "axi": [9, 13, 15], "x_valid": 9, "x_test": [9, 11], "4616": 9, "75": 9, "penalti": [9, 11], "l2": [9, 11], "solver": 9, "liblinear": 9, "dual": 9, "multi_class": [9, 11], "ovr": [9, 11], "7324099722991689": 9, "76": 9, "prob": [9, 11], "predict_proba": 9, "probs_df": 9, "column": [9, 11, 15], "classes_": 9, "pred": [9, 11], "hit": 9, "probs_df_linear": 9, "77": 9, "confusion_matrix": 9, "conf_mat": 9, "heatmap": [9, 14], "annot": [9, 14, 15], "xticklabel": [9, 14], "yticklabel": [9, 14], "ylabel": 9, "xlabel": 9, "callback": [9, 11, 12], "csvlogger": [9, 11, 12], "savemodelcallback": 9, "tt": [9, 11, 12], "tok_func": [9, 11, 12], "thaitoken": [9, 11, 12], "lang": [9, 11, 12], "pre_rules_th": [9, 11, 12], "post_rules_th": [9, 11, 12], "tokenizeprocessor": [9, 11, 12], "chunksiz": [9, 11, 12], "10000": [9, 11, 12], "mark_field": [9, 11, 12], "numericalizeprocessor": [9, 11, 12], "vocab": [9, 11, 12, 13, 15], "max_vocab": [9, 11, 12], "60000": [9, 11, 12], "data_lm": [9, 11, 12], "textlist": [9, 11, 12], "from_df": [9, 11, 12], "col": [9, 11, 12], "split_by_rand_pct": [9, 12], "valid_pct": [9, 11], "seed": [9, 11], "label_for_lm": [9, 11, 12], "databunch": [9, 11, 12], "sanity_check": [9, 11, 12], "wisesight_lm": 9, "pkl": [9, 11, 15], "train_d": [9, 11], "valid_d": [9, 11], "23823": 9, "240": [9, 10], "emb_sz": [9, 11, 12], "400": [9, 11, 12], "n_hid": [9, 11, 12], "1550": [9, 11, 12], "n_layer": [9, 11, 12], "pad_token": [9, 11, 12], "qrnn": [9, 11, 12], "tie_weight": [9, 11, 12], "out_bia": [9, 11, 12], "output_p": [9, 11, 12], "hidden_p": [9, 11, 12], "input_p": [9, 11, 12], "embed_p": [9, 11, 12], "weight_p": [9, 11, 12], "trn_arg": [9, 11, 12], "drop_mult": [9, 11, 12], "clip": [9, 11, 12], "alpha": [9, 11, 12], "beta": [9, 11, 12], "language_model_learn": [9, 11, 12], "awd_lstm": [9, 11, 12], "load_pretrain": [9, 11, 12], "_thwiki_lstm": [9, 11, 12], "languagelearn": [9, 12], "textlmdatabunch": [9, 12], "labellist": [9, 12], "item": [9, 12, 13], "lmtextlist": [9, 12], "xxbo": [9, 11, 12], "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e40\u0e23\u0e32": [9, 11], "\u0e1c\u0e25": 9, "\u0e07\u0e2d\u0e2d\u0e01": 9, "\u0e22\u0e32\u0e2a": 9, "\u0e40\u0e22\u0e2d\u0e30": [9, 11], "\u0e42\u0e25\u0e01": 9, "\u0e2d\u0e2d\u0e21": 9, "\u0e40\u0e04": [9, 15], "\u0e41\u0e19\u0e19": 9, "\u0e2d\u0e30\u0e44\u0e23": [9, 11], "\u0e19\u0e30": 9, "lmlabellist": [9, 12], "path": [9, 12, 13], "\u0e19\u0e30\u0e04\u0e30": [9, 11, 15], "\u0e41\u0e1c\u0e25": 9, "\u0e41\u0e16\u0e21": 9, "\u0e2d\u0e32\u0e23\u0e21\u0e13": 9, "\u0e42\u0e14\u0e19": 9, "xxunk": [9, 11, 12], "\u0e40\u0e19\u0e2d\u0e30": 9, "\u0e27\u0e19": [9, 11], "\u0e17\u0e32\u0e07": [9, 11], "\u0e01\u0e2d\u0e14": 9, "netflix": 9, "\u0e41\u0e19": [9, 11], "\u0e17\u0e33\u0e23": 9, "\u0e19\u0e2d\u0e19": 9, "\u0e1a\u0e15\u0e01": 9, "\u0e01\u0e32\u0e23\u0e41\u0e02": 9, "\u0e41\u0e2a\u0e07\u0e42\u0e2a\u0e21": 9, "\u0e2a\u0e19": 9, "\u0e01\u0e40\u0e01\u0e2d\u0e23": 9, "\u0e41\u0e14\u0e07": [9, 11], "\u0e42\u0e2d\u0e40\u0e1e": 9, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e1b": 9, "2560": 9, "\u0e2a\u0e19\u0e32\u0e21": 9, "\u0e04\u0e25": 9, "\u0e0b\u0e2d\u0e22": 9, "\u0e42\u0e0a\u0e04": 9, "\u0e25\u0e32\u0e14\u0e1e\u0e23": 9, "\u0e2d\u0e27\u0e14": 9, "\u0e17\u0e33\u0e44\u0e21": 9, "\u0e01\u0e04\u0e19": 9, "\u0e1e\u0e27\u0e01": 9, "\u0e1a\u0e2d": 9, "\u0e01\u0e27": [9, 11], "\u0e19\u0e21": [9, 14], "\u0e40\u0e1a\u0e25\u0e2d": 9, "\u0e43\u0e2a": 9, "\u0e02\u0e19\u0e32\u0e14": 9, "\u0e13\u0e41\u0e21": 9, "\u0e19\u0e30\u0e40\u0e19": 9, "\u0e40\u0e1b\u0e25": 9, "\u0e40\u0e2d\u0e07": 9, "\u0e27\u0e22\u0e15": 9, "\u0e21\u0e32\u0e2a": 9, "\u0e01\u0e42\u0e0a": 9, "\u0e32\u0e21\u0e04": 9, "cho": 9, "cosmet": 9, "daradaili": 9, "\u0e14\u0e32\u0e23\u0e32": 9, "\u0e40\u0e14\u0e25": 9, "\u0e04\u0e19\u0e44\u0e17\u0e22": 9, "\u0e19\u0e02": 9, "\u0e43\u0e19\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28": 9, "\u0e2b\u0e21": [9, 11, 15], "\u0e19\u0e25\u0e21": 9, "\u0e09\u0e30": 9, "\u0e42\u0e25": 9, "\u0e21\u0e30\u0e25": 9, "\u0e2d\u0e40\u0e1b\u0e25": 9, "250": 9, "\u0e02\u0e32\u0e22": [9, 11], "160": 9, "\u0e40\u0e22\u0e2d\u0e30\u0e41\u0e22\u0e30": 9, "\u0e01\u0e33\u0e44\u0e23": 9, "\u0e04\u0e27\u0e23": 9, "\u0e32\u0e27\u0e1c": 9, "\u0e43\u0e2b\u0e0d": 9, "300": [9, 11, 15], "\u0e16\u0e32\u0e14": 9, "\u0e32\u0e19\u0e1a\u0e19": 9, "80": [9, 11], "\u0e0a\u0e32\u0e40\u0e22": 9, "\u0e02\u0e27\u0e14": 9, "\u0e19\u0e41\u0e01": 9, "\u0e1e\u0e2d\u0e41\u0e25": 9, "\u0e40\u0e1a": [9, 11], "\u0e22\u0e23": 9, "120": 9, "\u0e32\u0e40\u0e01\u0e25": 9, "\u0e22\u0e14": 9, "\u0e21\u0e32": [9, 11], "360": [9, 11], "\u0e33\u0e41\u0e02": 9, "\u0e1e\u0e2d\u0e44\u0e14": 9, "\u0e2d\u0e32\u0e01\u0e32\u0e28": 9, "\u0e25\u0e30\u0e25\u0e32\u0e22": 9, "\u0e1e\u0e2d": 9, "\u0e17\u0e30\u0e40\u0e25": 9, "\u0e40\u0e1c\u0e32": 9, "\u0e25\u0e27\u0e01": 9, "\u0e32\u0e15\u0e32": 9, "\u0e01\u0e25\u0e32\u0e07\u0e46": [9, 11], "\u0e15\u0e33": 9, "\u0e41\u0e1b": 9, "\u0e21\u0e22\u0e33": 9, "\u0e2b\u0e23\u0e2d\u0e01": 9, "\u0e15\u0e23\u0e07": 9, "\u0e44\u0e1f": 9, "\u0e19\u0e43\u0e19": 9, "\u0e41\u0e17\u0e1a": 9, "\u0e41\u0e15\u0e30": 9, "\u0e19\u0e2d\u0e01": 9, "\u0e41\u0e22": 9, "\u0e40\u0e2d\u0e32\u0e40\u0e1b\u0e23": 9, "\u0e22\u0e1a": 9, "\u0e19\u0e40\u0e2d\u0e07": [9, 12], "\u0e2d\u0e32\u0e2b\u0e32\u0e23": [9, 11], "\u0e1a\u0e02": 9, "\u0e15\u0e32\u0e21": 9, "\u0e41\u0e04": [9, 11, 14], "\u0e40\u0e08\u0e2d": [9, 11], "\u0e41\u0e1a\u0e1a\u0e19": 9, "\u0e2a\u0e07\u0e2a": 9, "\u0e2d\u0e04": 9, "\u0e15\u0e32\u0e22": 9, "\u0e04\u0e32": 9, "\u0e43\u0e04\u0e23": [9, 11], "\u0e21\u0e2d\u0e07": 9, "\u0e32\u0e41\u0e23\u0e07": 9, "\u0e27\u0e19\u0e21\u0e32\u0e01": 9, "\u0e04\u0e19\u0e43\u0e19": 9, "\u0e32\u0e41\u0e23\u0e07\u0e02": 9, "\u0e02\u0e22": 9, "\u0e40\u0e25": [9, 10, 11, 15], "\u0e01\u0e19": [9, 11], "\u0e04\u0e19\u0e08\u0e19": 9, "\u0e04\u0e19\u0e23\u0e27\u0e22": 9, "\u0e01\u0e16": [9, 11], "\u0e2a\u0e21\u0e04\u0e27\u0e23": 9, "\u0e19\u0e41\u0e25\u0e30\u0e01": 9, "\u0e0a\u0e2d\u0e1a": [9, 10, 11], "\u0e19\u0e08\u0e23": 9, "\u0e0a\u0e32\u0e27\u0e15": 9, "\u0e32\u0e07\u0e0a\u0e32\u0e15": 9, "\u0e40\u0e16\u0e2d\u0e30": 9, "\u0e42\u0e2d\u0e01\u0e32\u0e2a": 9, "sequentialrnn": [9, 12], "encod": [9, 11, 12, 13, 14], "15000": 9, "padding_idx": [9, 12], "encoder_dp": [9, 12], "embeddingdropout": [9, 12], "emb": [9, 12], "rnn": [9, 10, 12], "modulelist": [9, 12, 13], "weightdropout": [9, 12], "lstm": [9, 12], "batch_first": [9, 12], "input_dp": [9, 12], "rnndropout": [9, 12], "hidden_dp": [9, 12], "lineardecod": [9, 12], "decod": [9, 12, 14], "linear": [9, 12, 13], "in_featur": [9, 12, 13], "out_featur": [9, 12, 13], "bia": [9, 10, 12, 13, 14], "output_dp": [9, 12], "opt_func": [9, 11, 12], "functool": [9, 12, 14], "partial": [9, 11, 12, 14], "optim": [9, 11, 12, 13], "adam": [9, 11, 12], "loss_func": [9, 12], "flattenedloss": [9, 12], "crossentropyloss": [9, 12], "0x7f51be568268": 9, "true_wd": [9, 12], "bn_wd": [9, 12], "wd": [9, 12], "train_bn": [9, 12], "posixpath": [9, 12], "model_dir": [9, 12], "callback_fn": [9, 12], "basic_train": [9, 12], "record": [9, 11, 12], "add_tim": [9, 12], "silent": [9, 12], "gradientclip": [9, 12], "rnntrainer": [9, 12], "layer_group": [9, 12], "sequenti": [9, 12], "cb_fns_regist": 9, "frozen": [9, 11], "freeze_to": [9, 11], "fit_one_cycl": [9, 11], "mom": [9, 11], "epoch": [9, 11], "train_loss": [9, 11], "valid_loss": [9, 11], "841187": 9, "462714": 9, "319742": 9, "unfrozen": [9, 11], "unfreez": [9, 11], "411834": 9, "205552": 9, "341766": 9, "03": 9, "178030": 9, "037095": 9, "361508": 9, "970388": 9, "930919": 9, "370139": 9, "756190": 9, "890398": 9, "376191": 9, "671704": 9, "890232": 9, "375595": 9, "save_encod": [9, 11], "wisesight_enc": 9, "lm": 9, "load_data": [9, 11], "data_cl": [9, 11], "itemlist": 9, "label_from_df": [9, 11], "ito": [9, 11, 12], "bptt": [9, 11], "500": [9, 15], "text_classifier_learn": [9, 11], "load_encod": [9, 11], "rnnlearner": 9, "textclasdatabunch": 9, "\u0e19\u0e41\u0e14\u0e14": 9, "\u0e40\u0e1e\u0e25\u0e2a": 9, "\u0e27\u0e43\u0e2b\u0e21": 9, "\u0e08\u0e23": [9, 11, 15], "\u0e42\u0e0b\u0e19": 9, "\u0e40\u0e27": 9, "\u0e2b\u0e25\u0e2d\u0e14": 9, "\u0e22\u0e32\u0e27": 9, "\u0e1d\u0e32": 9, "\u0e40\u0e2d\u0e32": [9, 11], "\u0e1e\u0e1a": 9, "\u0e25\u0e1b": 9, "soul": [9, 12], "pop": 9, "\u0e2a\u0e32\u0e21": 9, "\u0e2a\u0e44\u0e15\u0e25": 9, "\u0e07\u0e32\u0e19": [9, 11], "jamnight": 9, "\u0e19\u0e33": 9, "parkinson": 9, "xxup": 9, "toi": 9, "\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01": 9, "\u0e42\u0e0a\u0e27": 9, "\u0e41\u0e1a\u0e1a": 9, "\u0e1b\u0e41\u0e1a\u0e1a": 9, "\u0e27\u0e07": 9, "\u0e41\u0e08\u0e21": 9, "\u0e1e\u0e25\u0e32\u0e14": 9, "\u0e40\u0e08\u0e2d\u0e01": 9, "\u0e19\u0e22\u0e32\u0e22\u0e19": 9, "\u0e1b\u0e23\u0e30\u0e15": 9, "\u0e2a\u0e32\u0e21\u0e32\u0e23\u0e16": 9, "\u0e15\u0e23": [9, 14, 15], "event": 9, "go": [9, 12], "eventpop": 9, "me": [9, 12, 15], "\u0e08\u0e33\u0e01": 9, "\u0e2d\u0e32\u0e22": 9, "jamnightbyjameson": 9, "jamesonthailand": 9, "soulaftersix": 9, "theparkinson": 9, "thetoi": 9, "\u0e21\u0e30": 9, "\u0e1a\u0e2d\u0e01\u0e15": 9, "\u0e41\u0e1e": [9, 11], "\u0e40\u0e22": 9, "\u0e1e\u0e2d\u0e19": 9, "\u0e41\u0e15\u0e07\u0e42\u0e21": 9, "\u0e25\u0e14": 9, "\u0e2a\u0e07\u0e01\u0e23\u0e32\u0e19\u0e15": 9, "\u0e23\u0e2d\u0e14": 9, "555": 9, "categorylist": 9, "multibatchencod": 9, "poolinglinearclassifi": 9, "layer": [9, 13, 14], "batchnorm1d": 9, "1200": 9, "ep": [9, 13], "momentum": 9, "affin": 9, "track_running_stat": 9, "dropout": [9, 13], "27999999999999997": 9, "relu": 9, "inplac": [9, 13], "2e": [9, 11], "slice": [9, 11], "5e": [9, 11], "improv": 9, "monitor": 9, "bestmodel": 9, "script": [9, 12, 14], "train_model": 9, "812156": 9, "753478": 9, "687532": 9, "740403": 9, "699093": 9, "714394": 9, "727394": 9, "668807": 9, "723011": 9, "722163": 9, "675351": 9, "723517": 9, "675266": 9, "654477": 9, "738723": 9, "669178": 9, "641070": 9, "737962": 9, "612528": 9, "637456": 9, "744551": 9, "618259": 9, "635149": 9, "749366": 9, "572621": 9, "651169": 9, "749873": 9, "561985": 9, "661739": 9, "747593": 9, "534753": 9, "673563": 9, "738469": 9, "530844": 9, "688871": 9, "746072": 9, "522788": 9, "670024": 9, "743031": 9, "y_true": 9, "loss": [9, 11], "get_pr": [9, 11], "ds_type": 9, "datasettyp": [9, 11], "with_loss": 9, "argmax": [9, 11, 13], "to_df": 9, "8392661555312158": 9, "u": [10, 12, 13, 14], "look": [10, 12, 13, 14], "pypi": [10, 14], "pkg": [10, 14], "attempt": [10, 13], "dependency_pars": 10, "esupar": 10, "chu": 10, "liu": 10, "edmond": 10, "chu_liu_edmond": 10, "cp38": 10, "107": 10, "supar": 10, "93": 10, "2022": [10, 14, 15], "304": 10, "dill": [10, 15], "cu116": 10, "stanza": 10, "691": 10, "huggingfac": [10, 13, 14], "hub": [10, 13, 14], "huggingface_hub": [10, 13, 14], "182": 10, "jinja2": 10, "smart": 10, "pathi": 10, "langcod": 10, "pydant": 10, "logger": 10, "legaci": 10, "typer": 10, "protobuf": [10, 13], "confect": 10, "markupsaf": 10, "5626945": 10, "6613dcb188f57561a00a2e40eca1bbafe6203936b8d9c387facd79de3f06fa62": 10, "6f": 10, "3475485c7d991ca5698d39603e22a99bd6904dcac7d0a5855a": 10, "234926": 10, "e3b7a3e928e5e81053b9f869cfef5382b49f133284c6abbd718496ff11e8ee67": 10, "a1": 10, "b0bb1f7683d20b75b34ceeb56ee83a585e9b065a5fef0b2cb1": 10, "warn": [10, 13, 14], "broken": 10, "permiss": 10, "conflict": 10, "behaviour": 10, "manag": 10, "recommend": [10, 14], "virtual": 10, "environ": 10, "pypa": 10, "venv": 10, "spacy_pythainlp": 10, "dev6": 10, "nptype": 10, "473": 10, "docopt": 10, "fire": 10, "termcolor": 10, "13723": 10, "cd282751c98736c79933ed4265624e65891888bb9fdd01dc5d6fcf978d76431f": 10, "cc": 10, "f1e272f628fdb013d969acc99cfe2e031ea15b3efb74ffe842": 10, "116949": 10, "bc82a0082e9931af28c40d49e4494ce66a1f80f929b30ae4e7e1eff347b37c5c": 10, "86": 10, "88e8603bd3b1a9bff9d02d820c7431c47ad032865632657bb9": 10, "cuda": [10, 11], "__init__": 10, "497": 10, "userwarn": [10, 13, 14], "initi": [10, 11, 13, 14], "nvml": 10, "pos_engin": 10, "pos_corpu": 10, "orchid_ud": 10, "sent_engin": 10, "ner_engin": 10, "tokenize_engin": 10, "dependency_parsing_engin": 10, "dependency_parsing_model": 10, "bool": 10, "chang": [10, 12], "turn": [10, 12], "off": [10, 12], "0x7f9c02410a90": 10, "\u0e1c\u0e21\u0e40\u0e1b": 10, "\u0e19\u0e41\u0e21\u0e27": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a\u0e44\u0e1b\u0e40\u0e25": 10, "\u0e22\u0e19\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e21\u0e22": 10, "free": [10, 15], "commerci": 10, "pleas": 10, "contract": 10, "nectec": 10, "facebook": [10, 14, 15], "dancearmi": 10, "post": [10, 13], "10157641945708284": 10, "pos_lst20_perceptron": 10, "\u0e1c\u0e21\u0e0a\u0e2d\u0e1a": 10, "\u0e42\u0e23\u0e07": 10, "\u0e19\u0e32\u0e07\u0e23\u0e2d\u0e07": 10, "\u0e44\u0e1b\u0e40\u0e25": 10, "0x7f9c0146e880": 10, "weight": [10, 13, 14], "checkpoint": [10, 14], "koichiyasuoka": 10, "roberta": [10, 14], "spm": [10, 14], "upo": 10, "robertamodel": [10, 14], "classifi": [10, 11], "expect": [10, 14, 15], "anoth": [10, 14], "architectur": [10, 14], "bertforsequenceclassif": [10, 14], "bertforpretrain": [10, 14], "NOT": [10, 14], "exactli": [10, 14], "ident": [10, 14], "newli": [10, 14], "pooler": [10, 14], "dens": [10, 14], "should": [10, 12, 14], "probabl": [10, 14, 15], "down": [10, 12, 14], "stream": [10, 14, 15], "abl": [10, 14], "infer": [10, 14], "info": 10, "n_sentenc": 10, "n_batch": 10, "n_bucket": 10, "make": [10, 11, 12, 14], "apply_permut": 10, "tensor": [10, 11], "index_select": 10, "dim": [10, 11, 14], "permut": 10, "204603": 10, "elaps": 10, "dep": 10, "pron": 10, "sconj": 10, "nsubj": 10, "cop": 10, "acl": 10, "xcomp": 10, "obl": 10, "flat": 10, "star": [11, 12], "multi": 11, "both": [11, 12, 14], "number": 11, "micro": 11, "averag": 11, "f1": 11, "challeng": [11, 12], "micro_f1_publ": 11, "micro_f1_priv": 11, "59313": 11, "60322": 11, "5145": 11, "5109": 11, "5022": 11, "4976": 11, "59139": 11, "58139": 11, "bert": [11, 14], "56612": 11, "57057": 11, "review_dataset": 11, "wongnai_data": 11, "ast": [11, 12], "literal_ev": [11, 12], "counter": [11, 12], "re": [11, 12, 13, 15], "ft_data": 11, "respect": 11, "w_review_train": 11, "csv": [11, 12], "sep": [11, 12], "header": 11, "drop_dupl": 11, "rate": 11, "test_fil": 11, "concat": 11, "469282": 11, "304328": 11, "169880": 11, "046133": 11, "010377": 11, "two_df": 11, "one_df": 11, "train_bal": 11, "392365": 11, "254448": 11, "142036": 11, "115715": 11, "095436": 11, "dump": [11, 12, 14, 15], "skipgram": 11, "df_txt": 11, "df": 11, "ft_line": 11, "iterrow": 11, "ft_lab": 11, "__label__": 11, "ft_text": 11, "replace_newlin": 11, "close": [11, 14], "__label__0": 11, "df_all": 11, "home": 11, "charin": 11, "pretrainedvector": 11, "vec": 11, "1m": 11, "18176": 11, "progress": 11, "sec": 11, "thread": 11, "24858": 11, "lr": 11, "000000": 11, "309402": 11, "0h0m": 11, "wongnai_b": 11, "wordngram": 11, "731006": 11, "391282": 11, "764689": 11, "81": 11, "bin": [11, 15], "pred_lab": 11, "split": [11, 13, 15], "submit_df": 11, "reviewid": 11, "submit_fastttext_b": 11, "lukkiddd": 11, "train_split": 11, "test_split": 11, "pipelin": [11, 14], "countvector": 11, "tfidftransform": 11, "svm": 11, "text_clf": 11, "vect": 11, "clf": 11, "fit": 11, "memori": [11, 12], "binari": [11, 15], "decode_error": 11, "strict": 11, "int64": 11, "utf": [11, 13], "lowercas": 11, "max_df": 11, "max_featur": 11, "preprocessor": 11, "stop_word": 11, "ax_it": 11, "tol": 11, "0001": 11, "verbos": 11, "onehotencod": 11, "enc": 11, "handle_unknown": 11, "submit_linearsvc": 11, "59590": 11, "59731": 11, "processor": [11, 12, 13], "random_split_by_pct": 11, "wongnai_lm": 11, "45735": 11, "461": 11, "show_batch": 11, "idx": 11, "\u0e14\u0e32\u0e27": 11, "\u0e2b\u0e21\u0e14": 11, "\u0e0b\u0e30": 11, "\u0e32\u0e27\u0e2a\u0e27\u0e22": 11, "\u0e21\u0e32\u0e13": 11, "\u0e1e\u0e2d\u0e14": 11, "\u0e18\u0e22\u0e32\u0e28": 11, "\u0e1a\u0e23\u0e2d\u0e07": 11, "\u0e1a\u0e21\u0e32": 11, "\u0e2d\u0e22\u0e46": 11, "\u0e41\u0e16\u0e27": 11, "\u0e25\u0e2d\u0e07": 11, "\u0e41\u0e27\u0e30": 11, "\u0e2a\u0e33\u0e2b\u0e23": 11, "\u0e23\u0e2a": 11, "\u0e2d\u0e07\u0e14": 11, "\u0e21\u0e32\u0e01\u0e21\u0e32\u0e22": 11, "\u0e04\u0e07": 11, "\u0e42\u0e01\u0e42\u0e01": 11, "top": [11, 12], "\u0e22\u0e14\u0e32\u0e22": 11, "\u0e2b\u0e32": 11, "\u0e15\u0e2d\u0e19": 11, "\u0e27\u0e22\u0e40\u0e15": 11, "\u0e40\u0e19": 11, "\u0e17\u0e32\u0e19": 11, "\u0e2d\u0e19\u0e02": 11, "\u0e22\u0e32\u0e01": 11, "\u0e27\u0e32": 11, "\u0e2a\u0e32\u0e02\u0e32": 11, "\u0e12\u0e19\u0e32\u0e01\u0e32\u0e23": 11, "\u0e1d\u0e32\u0e01": 11, "\u0e2d\u0e01": [11, 14], "\u0e2b\u0e25\u0e32\u0e22\u0e2d\u0e22": 11, "\u0e1a\u0e23": 11, "\u0e01\u0e30": 11, "\u0e01\u0e2a\u0e32\u0e27": 11, "\u0e32\u0e02\u0e2d\u0e07\u0e23": 11, "\u0e08\u0e32": 11, "\u0e04\u0e27\u0e32\u0e21\u0e04": 11, "\u0e14\u0e40\u0e2b": 11, "\u0e27\u0e19\u0e15": 11, "\u0e2d\u0e2d\u0e01": 11, "\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07": 11, "\u0e1a\u0e27\u0e01": 11, "\u0e27\u0e19\u0e43\u0e2b\u0e0d": 11, "\u0e1a\u0e23\u0e23\u0e22\u0e32\u0e01\u0e32\u0e28": 11, "\u0e23\u0e16\u0e40\u0e02": 11, "\u0e42\u0e15": 11, "\u0e15\u0e01\u0e41\u0e15": 11, "\u0e19\u0e41\u0e19\u0e27": 11, "\u0e1a\u0e32\u0e23": 11, "\u0e42\u0e14\u0e22\u0e23\u0e2d\u0e1a": 11, "\u0e19\u0e23\u0e32": 11, "\u0e40\u0e21\u0e19": [11, 14], "next": [11, 12], "train_dl": 11, "414": 11, "3408": 11, "135": 11, "409": 11, "1325": 11, "1185": 11, "9903": 11, "368": 11, "870": 11, "254": 11, "3448": 11, "429": 11, "devic": 11, "193": 11, "10074": 11, "258": 11, "456": 11, "270": 11, "\u0e1a\u0e1e": 11, "\u0e2d\u0e07\u0e40\u0e2a": 11, "temperatur": [11, 12], "\u0e44\u0e2b\u0e21": 11, "mr": [11, 12], "\u0e04\u0e0a": 11, "\u0e09\u0e32\u0e22": 11, "2557": 11, "\u0e01\u0e33\u0e01": [11, 15], "\u0e1b\u0e1b": 11, "\u0e20\u0e32\u0e04": 11, "\u0e42\u0e23\u0e07\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 11, "2558": 11, "\u0e2d\u0e2b\u0e32": 11, "\u0e22\u0e27\u0e01": [11, 14], "lr_find": 11, "plot": [11, 12, 15], "finder": 11, "complet": 11, "learner_nam": 11, "graph": [11, 13], "min": 11, "gradient": [11, 14], "58e": 11, "04": [11, 13, 15], "22562": 11, "659182": 11, "493942": 11, "342857": 11, "375606": 11, "252919": 11, "385714": 11, "165419": 11, "013862": 11, "371429": 11, "034220": 11, "802707": 11, "357143": 11, "879111": 11, "712463": 11, "823682": 11, "624331": 11, "784611": 11, "580608": 11, "753532": 11, "553170": 11, "719396": 11, "516521": 11, "699165": 11, "513339": 11, "696516": 11, "512542": 11, "wongnai_enc": 11, "\u0e32\u0e19\u0e19": 11, "\u0e08\u0e30\u0e2d\u0e22": 11, "\u0e19\u0e01\u0e33\u0e41\u0e1e\u0e07": 11, "\u0e2d\u0e2d\u0e19": 11, "\u0e40\u0e25\u0e22\u0e41\u0e22\u0e01\u0e1a": 11, "\u0e07\u0e44\u0e1b2": [11, 14], "\u0e0a\u0e09\u0e30\u0e25\u0e32\u0e40\u0e15": [11, 14], "\u0e44\u0e2d\u0e28\u0e04\u0e23": [11, 14], "\u0e21\u0e0a\u0e32\u0e40\u0e02": [11, 14], "\u0e27\u0e27\u0e07\u0e40\u0e14": 11, "n\u0e2b": 11, "\u0e27\u0e14": [11, 14], "\u0e01\u0e46": 11, "\u0e15\u0e23\u0e30\u0e40\u0e27\u0e19\u0e2b\u0e32\u0e23": 11, "\u0e32\u0e19\u0e17\u0e32\u0e19": 11, "\u0e21\u0e32\u0e40\u0e08\u0e2d": 11, "\u0e08\u0e30\u0e27": 11, "\u0e19\u0e40\u0e08": 11, "\u0e32\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e01": 11, "\u0e04\u0e07\u0e44\u0e21": 11, "\u0e32\u0e04": 11, "\u0e14\u0e16": 11, "\u0e07\u0e2a\u0e25": 11, "\u0e14\u0e1c\u0e21\u0e04": 11, "\u0e32\u0e19\u0e41\u0e23\u0e01\u0e46\u0e40\u0e25\u0e22\u0e04\u0e23": 11, "add_test": 11, "wongnai_cl": 11, "sure": [11, 14], "got": [11, 12], "target": 11, "\u0e1e\u0e32": 11, "\u0e2d\u0e32\u0e40\u0e0b": 11, "\u0e23\u0e23": 11, "\u0e32\u0e1e\u0e23\u0e30\u0e22\u0e32\u0e1b\u0e32\u0e23": 11, "\u0e0a\u0e14\u0e32\u0e20": 11, "\u0e40\u0e29\u0e01": 11, "\u0e19\u0e01\u0e32\u0e23": 11, "\u0e0a\u0e27\u0e19": 11, "\u0e32\u0e40\u0e14": 11, "\u0e19\u0e40\u0e04\u0e22": 11, "\u0e07\u0e46": 11, "\u0e23\u0e16\u0e15": 11, "\u0e1e\u0e24\u0e28\u0e08": 11, "\u0e01\u0e32\u0e22\u0e19": 11, "\u0e32\u0e19\u0e21\u0e32": 11, "\u0e27\u0e07\u0e43\u0e19": 11, "\u0e14\u0e01": 11, "\u0e08\u0e01\u0e23\u0e23\u0e21": 11, "xxmaj": 11, "relax": 11, "night": [11, 12], "phothalai": 11, "\u0e21\u0e15": 11, "tast": 11, "\u0e2d\u0e07\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e2d\u0e19\u0e23": 11, "group": [11, 13, 15], "\u0e0d\u0e2b\u0e32": 11, "\u0e27\u0e16": 11, "\u0e01\u0e32\u0e23\u0e2a": 11, "\u0e2d\u0e2a\u0e32\u0e23": 11, "\u0e1e\u0e19": 11, "\u0e01\u0e07\u0e32\u0e19": 11, "\u0e21\u0e32\u0e16": 11, "terrac": 11, "\u0e2d\u0e07\u0e08\u0e32\u0e01": 11, "\u0e08\u0e19": 11, "\u0e17\u0e19": 11, "\u0e01\u0e23\u0e30\u0e41\u0e2a": 11, "\u0e04\u0e27\u0e32\u0e21\u0e41\u0e23\u0e07": 11, "shibuya": 11, "shabu": 11, "\u0e44\u0e2b\u0e27": 11, "\u0e02\u0e2d": 11, "\u0e15\u0e32\u0e21\u0e23\u0e2d\u0e22": 11, "\u0e2d\u0e07\u0e2b\u0e32": 11, "\u0e42\u0e2d": 11, "\u0e2a\u0e21\u0e32\u0e17\u0e32\u0e19": 11, "\u0e1b\u0e23\u0e30\u0e08\u0e33\u0e2a": 11, "\u0e0a\u0e32": 11, "\u0e40\u0e1e": [11, 12], "\u0e0a\u0e32\u0e27": 11, "\u0e01\u0e04\u0e23": 11, "pednoii": 11, "ahha": 11, "\u0e32\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23": 11, "\u0e41\u0e23\u0e01": 11, "\u0e19\u0e33\u0e40\u0e2a\u0e19\u0e2d": 11, "\u0e19\u0e32\u0e07\u0e43\u0e19": 11, "31e": 11, "07": 11, "gradual": 11, "187845": 11, "158394": 11, "472803": 11, "889035": 11, "828990": 11, "629707": 11, "760357": 11, "751162": 11, "656904": 11, "628719": 11, "721673": 11, "669456": 11, "submit_ulmfit": 11, "ulmfit": 12, "thwiki_lstm": 12, "dummi": 12, "imdb": 12, "untar_data": 12, "url": 12, "imdb_sampl": 12, "dummy_df": 12, "thwiki_ito": 12, "pickl": [12, 15], "itos_fnam": 12, "rb": [12, 15], "thwiki_vocab": 12, "check": 12, "60005": 12, "800": 12, "film": 12, "act": 12, "music": 12, "good": 12, "too": [12, 14], "though": 12, "mostli": 12, "earli": 12, "thing": 12, "still": 12, "realli": 12, "superstar": 12, "cast": 12, "face": [12, 13], "entir": 12, "excel": 12, "job": 12, "hard": 12, "watch": 12, "becaus": [12, 15], "situat": 12, "present": 12, "british": 12, "against": 12, "each": [12, 14, 15], "merit": 12, "view": 12, "forc": 12, "region": 12, "thei": [12, 14], "did": 12, "around": 12, "partit": 12, "simpli": [12, 14], "saw": 12, "between": [12, 14], "enough": 12, "veri": 12, "rememb": 12, "screen": 12, "never": 12, "paint": 12, "side": 12, "hope": 12, "younger": 12, "redempt": 12, "man": 12, "who": 12, "her": 12, "life": 12, "truli": 12, "love": 12, "later": 12, "she": 12, "great": 12, "pain": 12, "carri": 12, "messag": 12, "grave": 12, "peopl": 12, "realiti": 12, "sinc": [12, 15], "india": 12, "pakistan": 12, "border": 12, "sens": 12, "glad": 12, "seen": 12, "even": 12, "uk": 12, "could": [12, 15], "would": [12, 15], "better": 12, "onc": 12, "long": 12, "while": [12, 15], "movi": 12, "along": 12, "feel": 12, "labor": 12, "my": 12, "joi": 12, "where": [12, 13], "five": 12, "stereotyp": 12, "had": 12, "gui": 12, "fat": 12, "foreign": 12, "etc": 12, "being": [12, 13], "written": 12, "shot": 12, "product": 12, "low": 12, "junior": 12, "high": [12, 14], "video": 12, "director": 12, "produc": [12, 13], "ever": 12, "wors": 12, "entri": 12, "concept": 12, "funni": 12, "gari": 12, "coleman": 12, "actor": 12, "trust": 12, "sai": [12, 14], "went": 12, "dad": 12, "came": 12, "korea": 12, "he": 12, "short": [12, 14], "period": 12, "made": 12, "epic": 12, "imagin": 12, "cost": 12, "cheap": 12, "theme": 12, "duti": 12, "lip": 12, "offic": 12, "deep": 12, "declar": 12, "hi": 12, "peck": 12, "liber": 12, "understand": 12, "fearless": 12, "human": 12, "ve": 12, "fact": 12, "tail": 12, "mess": 12, "almost": 12, "walk": 12, "paid": 12, "ll": 12, "sit": 12, "bit": 12, "lose": 12, "its": 12, "someth": [12, 14], "ed": 12, "wood": 12, "dialogu": 12, "heard": 12, "viewer": 12, "cannot": [12, 13], "meet": 12, "oper": 12, "soon": 12, "stephen": 12, "best": 12, "ultim": 12, "tara": 12, "reid": 12, "plai": 12, "role": 12, "oh": 12, "help": 12, "talent": 12, "actress": 12, "stick": 12, "american": 12, "pie": 12, "know": 12, "kick": 12, "clich": 12, "\u00e9": 12, "typic": 12, "member": 12, "william": 12, "benton": 12, "believ": 12, "bias": 12, "toward": 12, "thief": 12, "born": 12, "bad": 12, "neither": 12, "slate": 12, "societi": 12, "parent": 12, "educ": 12, "what": [12, 14], "somewher": 12, "isn": [12, 15], "back": 12, "track": 12, "bet": 12, "wast": 12, "piec": 12, "valid": 12, "late": 12, "penn": 12, "teller": 12, "joe": 12, "bob": 12, "fridai": [12, 14], "school": 12, "year": 12, "doubt": 12, "televis": 12, "didn": 12, "stai": 12, "miss": 12, "john": 12, "bloom": 12, "live": 12, "belong": [12, 14], "question": [12, 14], "anyon": 12, "hour": 12, "moral": 12, "disast": 12, "david": 12, "care": 12, "purpos": 12, "singl": 12, "qualiti": 12, "treat": 12, "afternoon": 12, "budget": 12, "project": [12, 13], "stori": 12, "eva": 12, "tv": 12, "ideal": 12, "mani": 12, "cours": 12, "special": 12, "effect": 12, "gun": 12, "scene": 12, "move": 12, "although": 12, "problem": 12, "rent": 12, "student": 12, "ye": 12, "nake": 12, "emperor": 12, "speak": 12, "big": 12, "someon": 12, "state": [12, 14], "truth": 12, "old": 12, "bodi": 12, "nude": 12, "artist": 12, "front": 12, "audienc": 12, "ev": 12, "poor": 12, "wanna": 12, "ladi": 12, "sensit": 12, "becam": 12, "petti": 12, "satisfact": 12, "alarm": 12, "signal": [12, 13], "degre": 12, "work": [12, 14], "art": [12, 14], "cross": 12, "mix": 12, "ordinari": 12, "rural": 12, "pacif": 12, "northwest": 12, "solid": 12, "fine": 12, "dan": 12, "same": [12, 14], "highli": 12, "crash": 12, "paul": 12, "pace": 12, "action": 12, "urban": 12, "lo": 12, "angel": 12, "apart": 12, "relationship": [12, 15], "jim": 12, "0x7f5215ef6ea0": 12, "\u0e01\u0e32\u0e25\u0e04\u0e23": 12, "\u0e07\u0e19\u0e32\u0e19\u0e21\u0e32\u0e41\u0e25": 12, "min_p": 12, "005": 12, "\u0e27\u0e07\u0e2a\u0e2d\u0e07\u0e2b\u0e19": 12, "\u0e10\u0e32\u0e19\u0e30\u0e23": 12, "\u0e33\u0e23\u0e27\u0e22": 12, "\u0e41\u0e25\u0e30\u0e40\u0e1b": 12, "\u0e19\u0e25": 12, "\u0e01\u0e2a\u0e32\u0e27\u0e02\u0e2d\u0e07": 12, "\u0e14\u0e23": 12, "\u0e42\u0e04\u0e25": 12, "\u0e1a\u0e1a\u0e17\u0e42\u0e14\u0e22": 12, "\u0e2d\u0e25": 12, "\u0e01\u0e0a\u0e32\u0e22\u0e04\u0e19\u0e42\u0e15\u0e02\u0e2d\u0e07": 12, "\u0e42\u0e2d\u0e25": 12, "\u0e40\u0e27\u0e2d\u0e23": [12, 14], "\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "\u0e27\u0e07\u0e41\u0e23\u0e01": 12, "\u0e40\u0e02\u0e32\u0e40\u0e1b": 12, "\u0e42\u0e2d\u0e25\u0e25": 12, "\u0e40\u0e02\u0e32\u0e21": 12, "\u0e41\u0e25\u0e30\u0e41\u0e21": 12, "\u0e19\u0e04\u0e19\u0e17": 12, "\u0e15\u0e43\u0e08\u0e2d": 12, "\u0e2d\u0e19\u0e42\u0e22\u0e19": 12, "\u0e19\u0e40\u0e1e": 12, "\u0e2d\u0e19\u0e2a\u0e19": 12, "\u0e17\u0e01": 12, "\u0e04\u0e32\u0e25": 12, "\u0e42\u0e23\u0e2a": 12, "\u0e25\u0e2a": 12, "\u0e2d\u0e02\u0e2d\u0e07\u0e40\u0e18\u0e2d\u0e19": 12, "\u0e43\u0e19\u0e1b": 12, "1967": 12, "\u0e18\u0e44\u0e14": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e40\u0e25": 12, "\u0e22\u0e07\u0e14": 12, "\u0e08\u0e2d\u0e23": 12, "\u0e2a\u0e1b": 12, "\u0e25\u0e40\u0e1a": 12, "\u0e0b\u0e32\u0e23": 12, "\u0e2d\u0e21\u0e32\u0e01": 12, "\u0e1a\u0e01\u0e32\u0e23\u0e14": 12, "\u0e41\u0e25\u0e08\u0e32\u0e01\u0e41\u0e21": 12, "\u0e07\u0e17\u0e33\u0e43\u0e2b": [12, 14], "\u0e01\u0e29\u0e30\u0e14": 12, "\u0e32\u0e19\u0e27": 12, "\u0e41\u0e25\u0e30\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 12, "\u0e07\u0e08\u0e32\u0e01\u0e2a\u0e33\u0e40\u0e23": 12, "\u0e08\u0e01\u0e32\u0e23\u0e28": 12, "\u0e01\u0e29\u0e32\u0e08\u0e32\u0e01\u0e21\u0e2b\u0e32\u0e27": 12, "\u0e17\u0e22\u0e32\u0e25": 12, "\u0e22\u0e41\u0e25": 12, "\u0e19\u0e17\u0e32\u0e07\u0e44\u0e1b\u0e17": 12, "\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e2d\u0e2d\u0e2a\u0e40\u0e15\u0e23\u0e40\u0e25": 12, "\u0e01\u0e29\u0e32": 12, "\u0e41\u0e25\u0e30\u0e43\u0e19\u0e0a": 12, "\u0e27\u0e07\u0e19": 12, "\u0e19\u0e21\u0e32\u0e23\u0e14\u0e32": 12, "airesearch": [13, 14], "larg": 13, "xlsr": 13, "cu113": 13, "torchvis": 13, "torchaudio": 13, "pytorch": 13, "torch_stabl": 13, "html": 13, "link": 13, "2bcu113": 13, "1821": 13, "834": 13, "43tcmalloc": 13, "alloc": 13, "1147494400": 13, "byte": 13, "0x55bf21ac6000": 13, "0x7faf12d1b615": 13, "0x55bf1efac4cc": 13, "0x55bf1f08c47a": 13, "0x55bf1efaf2": 13, "0x55bf1f0a0e1d": 13, "0x55bf1f022e99": 13, "0x55bf1f01d9ee": 13, "0x55bf1efb0bda": 13, "0x55bf1f022d00": 13, "0x55bf1f01f737": 13, "0x55bf1f0a1c66": 13, "0x55bf1f01edaf": 13, "0x55bf1efb1039": 13, "0x55bf1eff4409": 13, "0x55bf1efafc52": 13, "0x55bf1f022c25": 13, "0x55bf1f01e915": 13, "0x55bf1efb0afa": 13, "0x55bf1f01ec0d": 13, "1055": 13, "37tcmalloc": 13, "1434370048": 13, "0x55bf6611c000": 13, "1336": 13, "39tcmalloc": 13, "1792966656": 13, "0x55bfbb908000": 13, "1691": 13, "38tcmalloc": 13, "2241208320": 13, "01tcmalloc": 13, "1821458432": 13, "0x55bfa7428000": 13, "0x7faf12d1a1e7": 13, "0x55bf1efe2067": 13, "tcmalloc": 13, "2276827136": 13, "0x55c013d3c000": 13, "0x55bf1efb1271": 13, "pillow": 13, "cu111": 13, "onnxruntim": 13, "soundfil": 13, "manylinux_2_12_x86_64": 13, "91": 13, "post1": 13, "895": [13, 15], "manylinux_2_5_x86_64": 13, "596": 13, "flatbuff": 13, "743": 13, "wav2vec2model": 13, "hug": 13, "autotoken": [13, 14], "wav2vec2forctc": 13, "import_huggingface_model": 13, "origin": [13, 15], "from_pretrain": [13, 14], "correspond": 13, "audio": 13, "stabl": 13, "hubert": 13, "configuration_util": 13, "341": 13, "gradient_checkpoint": 13, "v5": [13, 14], "gradient_checkpointing_en": 13, "trainer": [13, 14], "api": 13, "trainingargu": [13, 14], "eval": 13, "mode": 13, "feature_extractor": [13, 14], "featureextractor": 13, "conv_lay": 13, "convlayerblock": 13, "layer_norm": [13, 14], "layernorm": 13, "512": 13, "elementwise_affin": 13, "conv": 13, "conv1d": 13, "kernel_s": 13, "stride": 13, "feature_project": 13, "featureproject": 13, "1024": 13, "pos_conv_emb": 13, "convolutionalpositionalembed": 13, "128": 13, "encoderlay": 13, "attent": 13, "selfattent": 13, "k_proj": 13, "v_proj": 13, "q_proj": 13, "out_proj": 13, "feed_forward": 13, "feedforward": 13, "intermediate_dens": 13, "4096": 13, "intermediate_dropout": 13, "output_dens": 13, "output_dropout": 13, "final_layer_norm": 13, "microsoft": 13, "window": 13, "ai": [13, 14], "ml": 13, "input_s": 13, "100000": 13, "audio_maxlen": 13, "dummy_input": 13, "randn": 13, "requires_grad": 13, "export": 13, "asr3": 13, "export_param": 13, "opset_vers": 13, "do_constant_fold": 13, "whether": 13, "execut": 13, "constant": 13, "fold": 13, "input_nam": 13, "modelinput": 13, "output_nam": 13, "modeloutput": 13, "dynamic_ax": 13, "batch_siz": 13, "ax": [13, 14], "symbolic_help": 13, "325": 13, "caus": 13, "incorrect": 13, "dropbox": 13, "9kpeh8eodshcqhj": 13, "common_voice_th_23646850": 13, "wav": 13, "dl": 13, "mv": 13, "json": 13, "co": [13, 14], "r": [13, 15], "sig": 13, "sf": 13, "scipi": 13, "wavfil": 13, "sp": 13, "new_rat": 13, "16000": 13, "ort_sess": 13, "inferencesess": 13, "k": [13, 14], "unk": 13, "_normal": 13, "vasudevgupta7": 13, "gsoc": 13, "src": 13, "l101": 13, "fork": [13, 14], "tf": 13, "seqlen": 13, "keepdim": 13, "var": 13, "squeez": 13, "sqrt": 13, "remove_adjac": 13, "3460423": 13, "asr": 13, "wav2vec2_onnx": 13, "ipynb": [13, 15], "sampling_r": 13, "sampl": [13, 15], "new_data": 13, "resampl": 13, "float32": 13, "ort_input": 13, "ort_out": 13, "_t1": 13, "easili": 14, "finetun": 14, "drive": 14, "1kbk6sbspzlwcnoe61adaqo30xxqoq9ko": 14, "scrollto": 14, "n5iacot9b3cf": 14, "specif": [14, 15], "thaixtransform": 14, "236": 14, "106": 14, "safetensor": 14, "fsspec": 14, "355": 14, "seqev": 14, "28115": 14, "d0f182fee94a7c129f5bd1265a3e0d2a52893384d6783d11c8bbd770ef695fac": 14, "2c": 14, "4b": 14, "b2": 14, "a90368d80567249f258a9c58240512046afb5563d794eda4b2": 14, "auto": 14, "camemberttoken": 14, "automodel": 14, "automodelformaskedlm": 14, "automodelforsequenceclassif": 14, "automodelfortokenclassif": 14, "process_transform": 14, "xlmr": 14, "mbert": 14, "downstream": 14, "att": 14, "uncas": 14, "largest": 14, "78": 14, "5gb": 14, "assort": 14, "subword": 14, "xlm": 14, "multilingu": 14, "104": 14, "level": 14, "syllabl": 14, "syllabel": 14, "sefr": 14, "model_nam": [14, 15], "thaiwordsnewmmtoken": 14, "thaiwordssyllabletoken": 14, "fakesefrcuttoken": 14, "thairobertatoken": 14, "public_model": 14, "param": 14, "revis": 14, "model_max_length": 14, "416": 14, "unexpect": 14, "robertatoken": 14, "simplest": 14, "given": 14, "\u0e07\u0e08": 14, "\u0e19\u0e17\u0e23": 14, "\u0e25\u0e40\u0e25\u0e22": 14, "\u0e07\u0e2d\u0e22": 14, "\u0e1a\u0e19\u0e1e": 14, "454": 14, "\u0e02\u0e2d\u0e07\u0e2d\u0e33\u0e40\u0e20\u0e2d\u0e27": 14, "\u0e14\u0e23\u0e30\u0e22\u0e2d\u0e07": 14, "answer": [14, 15], "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e40\u0e21\u0e15\u0e23": 14, "\u0e15\u0e32\u0e23\u0e32\u0e07\u0e27\u0e32": 14, "\u0e44\u0e21\u0e25": 14, "substitut": 14, "instanc": [14, 15], "000": 14, "trane": 14, "proven": 14, "increas": 14, "aug": 14, "english": 14, "fill_mask": 14, "fill": 14, "input_text": 14, "u0e02": 14, "u0e2d": 14, "u0e40": 14, "u0e07": 14, "u0e34": 14, "u0e19": 14, "u0e01": 14, "u0e39": 14, "u0e49": 14, "u003cmask": 14, "u0e2b": 14, "u0e48": 14, "u0e22": 14, "\u0e42\u0e04\u0e23\u0e07\u0e01\u0e32\u0e23\u0e21": 14, "\u0e23\u0e30\u0e22\u0e30\u0e17\u0e32\u0e07\u0e17": 14, "\u0e07\u0e2b\u0e21\u0e14": 14, "\u0e08\u0e33\u0e19\u0e27\u0e19\u0e2a\u0e16\u0e32\u0e19": 14, "\u0e2a\u0e16\u0e32\u0e19": 14, "\u0e19\u0e40\u0e2a": 14, "\u0e19\u0e17\u0e32\u0e07\u0e2b\u0e25": 14, "\u0e01\u0e43\u0e19\u0e41\u0e19\u0e27\u0e40\u0e2b\u0e19": 14, "\u0e43\u0e15": 14, "\u0e15\u0e32\u0e21\u0e41\u0e19\u0e27\u0e17\u0e32\u0e07\u0e23\u0e16\u0e44\u0e1f\u0e40\u0e14": 14, "\u0e21\u0e02\u0e2d\u0e07\u0e01\u0e32\u0e23\u0e23\u0e16\u0e44\u0e1f\u0e41\u0e2b": 14, "\u0e07\u0e1b\u0e23\u0e30\u0e40\u0e17\u0e28\u0e44\u0e17\u0e22": 14, "\u0e32\u0e27\u0e2b\u0e19": 14, "\u0e32\u0e40\u0e19": 14, "\u0e2d\u0e40\u0e23": 14, "\u0e22\u0e01\u0e40\u0e1b": 14, "\u0e19\u0e20\u0e32\u0e29\u0e32": 14, "gy\u016bdon": 14, "\u0e08\u0e30\u0e44\u0e1b\u0e40\u0e1b": 14, "\u0e42\u0e14\u0e14\u0e40\u0e14": 14, "\u0e19\u0e1a\u0e19\u0e1f\u0e32\u0e01\u0e1f": 14, "\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e02\u0e27": 14, "\u0e02\u0e27": 14, "\u0e32\u0e40\u0e2d\u0e32\u0e21\u0e32\u0e14": 14, "\u0e07\u0e43\u0e08\u0e1d": 14, "\u0e04\u0e22\u0e2d\u0e14": 14, "\u0e02\u0e2d\u0e40\u0e07": 14, "\u0e01\u0e14": [14, 15], "allow": 14, "preprocess_input_text": 14, "boolean": 14, "fill_mask_pad": 14, "513759434223175": 14, "4263": 14, "token_str": 14, "\u0e23\u0e32\u0e21": 14, "\u0e23\u0e32\u0e21\u0e2b\u0e19": 14, "05489557236433029": 14, "552": 14, "0474877767264843": 14, "125": 14, "037654660642147064": 14, "5901": 14, "\u0e2a\u0e30\u0e14\u0e27\u0e01": 14, "\u0e2a\u0e30\u0e14\u0e27\u0e01\u0e2b\u0e19": 14, "026551486924290657": 14, "1913": 14, "\u0e19\u0e32": 14, "\u0e19\u0e32\u0e2b\u0e19": 14, "wisesight_senti": 14, "social": 14, "media": 14, "wongnai_review": 14, "awai": [14, 15], "classify_multiclass": 14, "u0e04": 14, "u0e1a": 14, "u0e32": 14, "u0e47": 14, "u0e21": 14, "u0e31": 14, "u0e41": 14, "u0e17": 14, "u0e15": 14, "u0e4c": 14, "u0e25": 14, "u0e303": 14, "u0e27": 14, "u0e14": 14, "u0e42": 14, "u0e23": 14, "u0e30": 14, "u0e1b": 14, "u0e37": 14, "\u0e2d\u0e22\u0e32\u0e01\u0e01": 14, "\u0e19\u0e27\u0e30\u0e41\u0e01": 14, "\u0e2d\u0e21\u0e32\u0e43\u0e2b": 14, "\u0e2d\u0e22\u0e08": 14, "\u0e13\u0e41\u0e01\u0e21\u0e32\u0e01": 14, "\u0e42\u0e04\u0e15\u0e23\u0e1a": 14, "\u0e32\u0e40\u0e25\u0e22": 14, "\u0e1f\u0e2d\u0e23": 14, "\u0e01\u0e15\u0e25\u0e32\u0e14": 14, "\u0e19\u0e40\u0e14": 14, "prachachat": 14, "\u0e15\u0e25\u0e32\u0e14\u0e23\u0e16\u0e22\u0e19\u0e15": 14, "\u0e23\u0e2a\u0e0a\u0e32\u0e40\u0e02": 14, "\u0e22\u0e27\u0e40\u0e02": 14, "\u0e2b\u0e2d\u0e21": 14, "\u0e01\u0e25\u0e21\u0e01\u0e25": 14, "\u0e14\u0e41\u0e1a\u0e1a\u0e08": 14, "\u0e14\u0e2a\u0e19": 14, "\u0e27\u0e19\u0e44\u0e2d\u0e28\u0e04\u0e23": 14, "\u0e17\u0e32\u0e19\u0e41\u0e25": 14, "\u0e27\u0e23\u0e2a\u0e21": 14, "\u0e19\u0e2d\u0e2d\u0e01\u0e43\u0e1a\u0e44\u0e21": 14, "\u0e46\u0e21\u0e32\u0e01\u0e01\u0e27": 14, "\u0e32\u0e0a\u0e32\u0e40\u0e02": 14, "\u0e27\u0e01": 14, "\u0e2b\u0e27\u0e32\u0e19\u0e44\u0e1b": 14, "\u0e42\u0e14\u0e22\u0e23\u0e27\u0e21\u0e41\u0e25": 14, "\u0e27\u0e40\u0e09\u0e22\u0e21\u0e32\u0e01\u0e01": 14, "\u0e33\u0e40\u0e1b\u0e25": 14, "\u0e32\u0e1a\u0e23": 14, "\u0e01\u0e32\u0e23\u0e1f\u0e23": 14, "\u0e40\u0e04\u0e22\u0e1a": 14, "\u0e32\u0e40\u0e2d": 14, "\u0e21\u0e40\u0e04\u0e01": 14, "\u0e1a\u0e41\u0e21": 14, "\u0e25\u0e303": 14, "\u0e42\u0e04\u0e15\u0e23\u0e2b\u0e19": 14, "\u0e01\u0e41\u0e25\u0e30\u0e42\u0e04\u0e15\u0e23\u0e40\u0e1b\u0e25": 14, "\u0e2d\u0e07\u0e07\u0e07\u0e07": 14, "892067551612854": 14, "entiti": 14, "recognit": 14, "classify_token": 14, "ignore_label": 14, "token_classif": 14, "169": 14, "aggregation_strategi": 14, "u0e35": 14, "u0e2a": 14, "u0e38": 14, "u0e44": 14, "\u0e41\u0e14\u0e07\u0e40\u0e14": 14, "\u0e2d\u0e14\u0e23\u0e2d\u0e1a\u0e2a\u0e2d\u0e07": 14, "\u0e01\u0e40\u0e22": 14, "\u0e41\u0e21\u0e19\u0e2f": 14, "\u0e44\u0e19\u0e40\u0e15": 14, "\u0e22\u0e40\u0e2a": 14, "\u0e22\u0e07\u0e2a": 14, "\u0e0d\u0e40\u0e2a": 14, "\u0e22\u0e08\u0e32\u0e01\u0e20": 14, "\u0e22\u0e18\u0e23\u0e23\u0e21\u0e0a\u0e32\u0e15": 14, "\u0e21\u0e32\u0e01\u0e2a": 14, "\u0e17\u0e33\u0e43\u0e2b": 14, "\u0e1b\u0e23\u0e30\u0e0a\u0e32\u0e0a\u0e19\u0e01\u0e27": 14, "\u0e2d\u0e07\u0e2d\u0e1e\u0e22\u0e1e\u0e2d\u0e2d\u0e01\u0e08\u0e32\u0e01\u0e1e": 14, "\u0e1e\u0e25\u0e02\u0e2d\u0e07\u0e1e\u0e32\u0e22": 14, "\u0e32\u0e19\u0e40\u0e23": 14, "\u0e2d\u0e19\u0e40\u0e01": 14, "\u0e2d\u0e1a": 14, "700": 14, "\u0e07\u0e1e": 14, "\u0e07\u0e16\u0e25": 14, "\u0e21\u0e25\u0e07\u0e21\u0e32": 14, "\u0e32\u0e07\u0e04\u0e27\u0e32\u0e21\u0e40\u0e2a": 14, "\u0e22\u0e2b\u0e32\u0e22\u0e04": 14, "\u0e14\u0e40\u0e1b": 14, "\u0e25\u0e04": 14, "450": 14, "\u0e32\u0e19\u0e2b\u0e22\u0e27\u0e19": 14, "\u0e01\u0e17\u0e0a": 14, "\u0e40\u0e15\u0e23": 14, "\u0e22\u0e21\u0e17\u0e14\u0e25\u0e2d\u0e07\u0e1b\u0e23\u0e30\u0e21": 14, "3\u0e08": 14, "entity_group": 14, "97664016": 14, "99976474": 14, "less": 14, "tradit": 14, "logist": 14, "regress": 14, "forest": 14, "boost": 14, "imag": 14, "mrpeerat": 14, "bramvanroi": 14, "extract_last_k_token": 14, "last_k": 14, "hidden_st": 14, "last_k_token": 14, "concatenated_hidden_st": 14, "sum": 14, "_extract_last_k_lay": 14, "aggregator_fn": 14, "return_tensor": 14, "pt": 14, "no_grad": 14, "output_hidden_st": 14, "select": 14, "hidden": 14, "cat": 14, "aggregated_hidden_st": 14, "extract_last_k_lay": 14, "pretrained_model_name_or_path": 14, "lm_head": 14, "japanes": 14, "food": [14, 15], "gyudon": 14, "italian": 14, "macaroni": 14, "cosin": 14, "consid": 14, "last": 14, "markdown": 14, "obtain": 14, "aggreg": 14, "via": 14, "summat": 14, "represnetaiton": 14, "text1": 14, "\u0e19\u0e0a\u0e2d\u0e1a\u0e01": 14, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e0d": 14, "text2": 14, "\u0e19\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2d": 14, "\u0e15\u0e32\u0e40\u0e25": 14, "text3": 14, "text4": 14, "\u0e01\u0e01\u0e30\u0e42\u0e23\u0e19": 14, "t1": 14, "t3": 14, "t4": 14, "pairwis": 14, "cosine_similar": 14, "sim_matrix": 14, "cmap": 14, "blue": 14, "bo": 14, "fanci": 14, "mayb": 14, "march": 14, "releas": [14, 15], "wanchanberta": 14, "xnli": 14, "pair": 14, "branch": 14, "xnli_th": 14, "repositori": 14, "zero_classifi": 14, "u0e0d": 14, "u0e0a": 14, "u0e1": 14, "u0e18": 14, "scb": 14, "10x": 14, "u0e43": 14, "blockfi": 14, "startup": 14, "digit": 14, "asset": 14, "u0e13": 14, "u0e10": 14, "u201c": 14, "u201d": 14, "u0e1c": 14, "u0e20": 14, "u0e29": 14, "u201cwher": 14, "u0e08": 14, "u0e16": 14, "u0e1f": 14, "u0e28": 14, "u0e33": 14, "u0e11": 14, "u0e1d": 14, "candidate_label": 14, "\u0e40\u0e28\u0e23\u0e29\u0e10\u0e01": 14, "\u0e23\u0e01": 14, "\u0e01\u0e32\u0e23\u0e40\u0e21": 14, "\u0e40\u0e17\u0e04\u0e42\u0e19\u0e42\u0e25\u0e22": 14, "\u0e25\u0e1b\u0e30": 14, "\u0e19\u0e40\u0e17": 14, "hypothesis_templ": 14, "\u0e1e\u0e32\u0e14\u0e2b": 14, "\u0e27\u0e02": 14, "\u0e32\u0e27\u0e19": 14, "\u0e21\u0e44\u0e1a\u0e40\u0e14\u0e19\u0e2b\u0e32\u0e23": 14, "\u0e1a\u0e0d": 14, "\u0e01\u0e23\u0e30\u0e0a": 14, "\u0e1a\u0e04\u0e27\u0e32\u0e21\u0e40\u0e1b": 14, "\u0e19\u0e18\u0e21": 14, "34431710839271545": 14, "3195861279964447": 14, "18645761907100677": 14, "14963914453983307": 14, "v0": 15, "word2vec": 15, "oppos": 15, "latter": 15, "garner": 15, "556": 15, "dimens": 15, "descend": 15, "frequenc": 15, "readabl": 15, "applic": 15, "thwiki_lm": 15, "word2vec_exampl": 15, "inlin": 15, "manifold": 15, "tsne": 15, "fm": 15, "load_word2vec_format": 15, "wordvector": 15, "thai2fit_wv": 15, "get_model": 15, "thai2dict": 15, "index2word": 15, "from_dict": 15, "orient": 15, "290": 15, "291": 15, "292": 15, "293": 15, "294": 15, "295": 15, "296": 15, "298": 15, "299": 15, "308956": 15, "097699": 15, "116745": 15, "215612": 15, "015768": 15, "064163": 15, "062168": 15, "039649": 15, "864940": 15, "846904": 15, "142418": 15, "033241": 15, "171581": 15, "624864": 15, "009358": 15, "449131": 15, "120130": 15, "122195": 15, "450617": 15, "071318": 15, "010751": 15, "618971": 15, "129665": 15, "035460": 15, "007560": 15, "027607": 15, "397824": 15, "026543": 15, "254075": 15, "168328": 15, "105786": 15, "180930": 15, "101630": 15, "070885": 15, "037263": 15, "183606": 15, "049088": 15, "672288": 15, "293044": 15, "592576": 15, "015736": 15, "258926": 15, "052953": 15, "153728": 15, "005985": 15, "021081": 15, "041088": 15, "057312": 15, "633230": 15, "442729": 15, "009408": 15, "252576": 15, "305512": 15, "372542": 15, "049151": 15, "568470": 15, "266586": 15, "400800": 15, "784650": 15, "197369": 15, "189711": 15, "174774": 15, "171124": 15, "186771": 15, "054294": 15, "114150": 15, "109456": 15, "094466": 15, "447015": 15, "042377": 15, "168676": 15, "148738": 15, "680404": 15, "097702": 15, "020270": 15, "182967": 15, "083949": 15, "006287": 15, "707434": 15, "070234": 15, "156962": 15, "231863": 15, "080312": 15, "323157": 15, "215695": 15, "055145": 15, "420794": 15, "016842": 15, "256759": 15, "832864": 15, "044267": 15, "147186": 15, "105424": 15, "907078": 15, "009299": 15, "550953": 15, "139337": 15, "031696": 15, "670379": 15, "008048": 15, "428813": 15, "031194": 15, "041922": 15, "036608": 15, "008106": 15, "076470": 15, "782270": 15, "033361": 15, "606864": 15, "440520": 15, "024458": 15, "025031": 15, "103389": 15, "078255": 15, "034323": 15, "459774": 15, "748643": 15, "337775": 15, "487408": 15, "511535": 15, "287710": 15, "064193": 15, "205076": 15, "146356": 15, "071343": 15, "039451": 15, "845461": 15, "163763": 15, "018096": 15, "272786": 15, "051024": 15, "532856": 15, "131856": 15, "090323": 15, "058895": 15, "151262": 15, "420358": 15, "055971": 15, "930814": 15, "163908": 15, "239587": 15, "303620": 15, "079953": 15, "453045": 15, "528826": 15, "161692": 15, "235725": 15, "099673": 15, "691668": 15, "536159": 15, "110436": 15, "297495": 15, "217414": 15, "045158": 15, "066647": 15, "190095": 15, "304333": 15, "724927": 15, "995488": 15, "716609": 15, "120522": 15, "355783": 15, "168180": 15, "377733": 15, "158624": 15, "047249": 15, "361140": 15, "161460": 15, "913314": 15, "345037": 15, "116285": 15, "318218": 15, "356664": 15, "519889": 15, "130475": 15, "125772": 15, "101328": 15, "382658": 15, "205359": 15, "340139": 15, "086848": 15, "155231": 15, "133015": 15, "039913": 15, "183761": 15, "115142": 15, "940854": 15, "066565": 15, "399744": 15, "146722": 15, "019406": 15, "181474": 15, "099863": 15, "516092": 15, "201697": 15, "249139": 15, "252957": 15, "138815": 15, "018209": 15, "232265": 15, "sne": 15, "compress": 15, "plane": 15, "thai2plot": 15, "tnse": 15, "n_compon": 15, "init": 15, "pca": 15, "n_iter": 15, "fit_transform": 15, "wb": 15, "jeffmcneil": 15, "dip": 15, "sipa": 15, "regular": 15, "111": 15, "479628": 15, "468k": 15, "octet": 15, "regu": 15, "468": 15, "39k": 15, "stolen": 15, "blog": 15, "manash": 15, "a71e6d55f27": 15, "plot_with_label": 15, "low_dim_emb": 15, "figsiz": 15, "axis_lim": 15, "assert": 15, "figur": 15, "inch": 15, "scatter": 15, "prop": 15, "fontproperti": 15, "fname": 15, "xy": 15, "xytext": 15, "textcoord": 15, "offset": 15, "va": 15, "bottom": 15, "savefig": 15, "\u0e2b\u0e0d": 15, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a\u0e32": 15, "\u0e0a\u0e32\u0e22": 15, "\u0e1e\u0e23\u0e30\u0e23\u0e32\u0e0a": 15, "\u0e19\u0e32\u0e22\u0e01\u0e23": 15, "\u0e10\u0e21\u0e19\u0e15\u0e23": 15, "\u0e2d\u0e33\u0e19\u0e32\u0e08": 15, "\u0e1b\u0e23\u0e30\u0e18\u0e32\u0e19\u0e32\u0e18": 15, "\u0e07\u0e01": 15, "\u0e42\u0e1a\u0e23\u0e32\u0e13": 15, "\u0e44\u0e14\u0e42\u0e19\u0e40\u0e2a\u0e32\u0e23": 15, "most_similar_cosmul": 15, "7954867482185364": 15, "7382755279541016": 15, "\u0e1e\u0e23\u0e30\u0e40\u0e08": 15, "7046602368354797": 15, "\u0e32\u0e0a\u0e32\u0e22": 15, "6979373097419739": 15, "\u0e1e\u0e23\u0e30\u0e21\u0e2b\u0e32\u0e01\u0e29": 15, "6972416639328003": 15, "\u0e32\u0e1f": 15, "\u0e32\u0e2b\u0e0d": 15, "6871017217636108": 15, "\u0e32\u0e41\u0e1c": 15, "6827988624572754": 15, "\u0e1e\u0e23\u0e30\u0e1e": 15, "\u0e17\u0e18\u0e40\u0e08": 15, "671796977519989": 15, "\u0e21\u0e01": 15, "\u0e0e\u0e23\u0e32\u0e0a\u0e01": 15, "\u0e21\u0e32\u0e23": 15, "6711805462837219": 15, "\u0e19\u0e32\u0e22\u0e1e\u0e25": 15, "6694187521934509": 15, "sample_word": 15, "sample_idx": 15, "sample_plot": 15, "\u0e23\u0e2d\u0e07\u0e19\u0e32\u0e22\u0e01\u0e23": 15, "4945054054260254": 15, "400755763053894": 15, "3626699447631836": 15, "\u0e19\u0e40\u0e2d\u0e01": 15, "3437265157699585": 15, "\u0e0d\u0e0a\u0e32\u0e01\u0e32\u0e23\u0e17\u0e2b\u0e32\u0e23\u0e1a\u0e01": 15, "3405414819717407": 15, "\u0e1a\u0e20\u0e32\u0e1e\u0e22\u0e19\u0e15\u0e23": 15, "3339321613311768": 15, "\u0e01\u0e1f": 15, "\u0e15\u0e1a\u0e2d\u0e25": 15, "331659197807312": 15, "\u0e40\u0e2d\u0e01\u0e2d": 15, "\u0e04\u0e23\u0e23\u0e32\u0e0a\u0e17": 15, "3306005001068115": 15, "3243674039840698": 15, "\u0e20\u0e32\u0e1e\u0e2a\u0e15\u0e23": 15, "3231494426727295": 15, "\u0e15\u0e27": 15, "\u0e07\u0e21": 15, "537461519241333": 15, "\u0e22\u0e07\u0e25": 15, "\u0e27\u0e22\u0e19\u0e21": 15, "5080005526542664": 15, "\u0e41\u0e21\u0e25\u0e07": 15, "5048903226852417": 15, "\u0e1c\u0e25\u0e44\u0e21": 15, "4839756190776825": 15, "47641509771347046": 15, "46431201696395874": 15, "45941096544265747": 15, "45185261964797974": 15, "4504697620868683": 15, "44425833225250244": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e0a": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e2a": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e40\u0e22": 15, "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e01\u0e25\u0e32\u0e07\u0e27": 15, "wherea": 15, "meal": 15, "\u0e25\u0e32\u0e01": 15, "push": 15, "rest": 15, "eat": 15, "reli": 15, "\u0e01\u0e40\u0e02\u0e22": 15, "associ": 15, "male": 15, "gender": 15, "\u0e2b\u0e21\u0e32": 15, "\u0e2b\u0e21\u0e2d": 15, "china": 15, "beij": 15, "itali": 15, "rome": 15, "\u0e42\u0e23\u0e21": 15, "\u0e15\u0e32\u0e25": 15, "3135956": 15, "42819628": 15, "27347285": 15, "17900795": 15, "02666693": 15, "24352394": 15, "\u0e42\u0e15\u0e40\u0e01": 15, "contribut": 15, "sakar": 15, "atv": 15, "adapt": 15, "spell": 15, "cpmp": 15, "w_rank": 15, "thai_lett": 15, "\u0e01\u0e02\u0e03\u0e04\u0e05\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e24\u0e45\u0e25\u0e26\u0e26\u0e45\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e30": 15, "\u0e40\u0e41\u0e42\u0e43\u0e44": 15, "findal": 15, "lower": 15, "invers": 15, "proxi": 15, "dictionari": 15, "max": 15, "candid": 15, "edits1": 15, "edits2": 15, "subset": 15, "appear": 15, "delet": 15, "transpos": 15, "replac": 15, "insert": 15, "e1": 15, "\u0e14\u0e19\u0e32": 15, "\u0e12\u0e19\u0e32": 15, "\u0e02\u0e23": 15, "\u0e08\u0e22": 15, "\u0e19\u0e30\u0e04": 15}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"welcom": 0, "pythainlp": [0, 1, 4, 5, 7, 10], "tutori": 0, "han": 1, "coref": 1, "thai": [1, 2, 3, 4, 6, 7, 12, 13], "corefer": 1, "resolut": 1, "depend": 2, "parser": [2, 6], "find": 3, "all": 3, "rhyme": 3, "word": [3, 7, 15], "from": 3, "translat": 4, "instal": [4, 5, 13, 14], "import": [4, 7, 15], "list": 4, "languag": [4, 9, 11, 12], "english": 4, "nlpo3": 5, "dictionari": [5, 7], "custom": [5, 7], "chunk": 6, "get": [7, 14], "start": [7, 14], "charact": 7, "check": 7, "string": 7, "contain": 7, "how": 7, "mani": 7, "collat": 7, "date": 7, "time": 7, "format": 7, "spellout": 7, "token": [7, 14], "segment": 7, "sentenc": 7, "subword": [7, 8], "syllabl": 7, "cluster": 7, "tcc": 7, "low": 7, "level": 7, "oper": 7, "transliter": 7, "normal": 7, "digit": 7, "convers": 7, "soundex": 7, "spellcheck": [7, 15], "frequenc": 7, "part": [7, 8], "speech": [7, 8], "tag": 7, "name": [7, 8], "entiti": [7, 8], "vector": [7, 14], "number": 7, "spell": 7, "out": 7, "wangchanberta": [8, 14], "recognit": 8, "wisesight": [9, 14], "sentiment": [9, 14], "analysi": 9, "text": [9, 12, 14], "processor": 9, "logist": 9, "regress": 9, "process": 9, "file": 9, "csv": 9, "load": 9, "data": 9, "train": 9, "valid": 9, "split": 9, "creat": 9, "featur": [9, 14], "fit": 9, "model": [9, 11, 12, 13, 14], "see": 9, "result": 9, "ulmfit": [9, 11], "finetun": [9, 11], "classifi": [9, 14], "spaci": 10, "wongnai": [11, 14], "review": [11, 14], "classif": [11, 14], "oversampl": 11, "fasttext": 11, "linearsvc": 11, "submiss": 11, "wiki": 12, "gener": 12, "wav2vec2": 13, "onnx": 13, "build": 13, "infer": 13, "notebook": 14, "choos": 14, "pretrain": 14, "mask": 14, "predict": 14, "sequenc": 14, "multi": 14, "class": 14, "thainer": 14, "lst20": 14, "document": 14, "extract": 14, "zero": 14, "shot": 14, "thai2vec": 15, "embed": 15, "exampl": 15, "arithmet": 15, "doesn": 15, "t": 15, "match": 15, "cosin": 15, "similar": 15}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "nbsphinx": 4, "sphinx": 57}, "alltitles": {"Welcome to PyThaiNLP Tutorials": [[0, "welcome-to-pythainlp-tutorials"]], "Tutorials:": [[0, null]], "\ud83e\udebf Han-Coref: Thai Coreference resolution by PyThaiNLP": [[1, "\ud83e\udebf-Han-Coref:-Thai-Coreference-resolution-by-PyThaiNLP"]], "Thai Dependency Parser": [[2, "Thai-Dependency-Parser"]], "Find all Thai rhyming words from Thai word": [[3, "Find-all-Thai-rhyming-words-from-Thai-word"]], "PyThaiNLP Translate": [[4, "PyThaiNLP-Translate"]], "Install": [[4, "Install"], [13, "Install"]], "Translate": [[4, "Translate"]], "Import": [[4, "Import"]], "List language": [[4, "List-language"]], "English to Thai": [[4, "English-to-Thai"]], "Thai to English": [[4, "Thai-to-English"]], "nlpO3": [[5, "nlpO3"]], "Installation": [[5, "Installation"], [14, "Installation"]], "PyThaiNLP dictionary": [[5, "PyThaiNLP-dictionary"]], "Custom dictionary": [[5, "Custom-dictionary"]], "Thai Chunk Parser": [[6, "Thai-Chunk-Parser"]], "PyThaiNLP Get Started": [[7, "PyThaiNLP-Get-Started"]], "Import PyThaiNLP": [[7, "Import-PyThaiNLP"]], "Thai Characters": [[7, "Thai-Characters"]], "Checking if a string contains Thai character or not, or how many": [[7, "Checking-if-a-string-contains-Thai-character-or-not,-or-how-many"]], "Collation": [[7, "Collation"]], "Date/Time Format and Spellout": [[7, "Date/Time-Format-and-Spellout"]], "Date/Time Format": [[7, "Date/Time-Format"]], "Time Spellout": [[7, "Time-Spellout"]], "Tokenization and Segmentation": [[7, "Tokenization-and-Segmentation"]], "Sentence": [[7, "Sentence"]], "Word": [[7, "Word"]], "Subword, syllable, and Thai Character Cluster (TCC)": [[7, "Subword,-syllable,-and-Thai-Character-Cluster-(TCC)"]], "Subword tokenization": [[7, "Subword-tokenization"]], "Syllable tokenization": [[7, "Syllable-tokenization"]], "Low-level subword operations": [[7, "Low-level-subword-operations"]], "Transliteration": [[7, "Transliteration"]], "Normalization": [[7, "Normalization"]], "Digit conversion": [[7, "Digit-conversion"]], "Soundex": [[7, "Soundex"]], "Spellchecking": [[7, "Spellchecking"], [15, "Spellchecking"]], "Spellchecking - Custom dictionary and word frequency": [[7, "Spellchecking---Custom-dictionary-and-word-frequency"]], "Part-of-Speech Tagging": [[7, "Part-of-Speech-Tagging"]], "Named-Entity Tagging": [[7, "Named-Entity-Tagging"]], "Word Vector": [[7, "Word-Vector"]], "Number Spell Out": [[7, "Number-Spell-Out"]], "Wangchanberta": [[8, "Wangchanberta"]], "Named Entity Recognition": [[8, "Named-Entity-Recognition"]], "Part of speech": [[8, "Part-of-speech"]], "Subword": [[8, "Subword"]], "Wisesight Sentiment Analysis": [[9, "Wisesight-Sentiment-Analysis"]], "Text Processor for Logistic Regression": [[9, "Text-Processor-for-Logistic-Regression"]], "Process Text Files to CSVs": [[9, "Process-Text-Files-to-CSVs"]], "Load Data": [[9, "Load-Data"]], "Train-validation Split": [[9, "Train-validation-Split"]], "Logistic Regression": [[9, "Logistic-Regression"]], "Create Features": [[9, "Create-Features"]], "Fit Model": [[9, "Fit-Model"]], "See Results": [[9, "See-Results"], [9, "id1"]], "ULMFit Model": [[9, "ULMFit-Model"], [11, "ULMFit-Model"]], "Finetune Language Model": [[9, "Finetune-Language-Model"], [11, "Finetune-Language-Model"]], "Train Text Classifier": [[9, "Train-Text-Classifier"]], "spaCy-PyThaiNLP": [[10, "spaCy-PyThaiNLP"]], "Wongnai Review Classification": [[11, "Wongnai-Review-Classification"]], "Oversampling": [[11, "Oversampling"]], "fastText Model": [[11, "fastText-Model"]], "LinearSVC Model": [[11, "LinearSVC-Model"]], "Classification": [[11, "Classification"]], "Submission": [[11, "Submission"]], "Thai Wiki Language Model for Text Generation": [[12, "Thai-Wiki-Language-Model-for-Text-Generation"]], "Thai Wav2vec2 model to ONNX model": [[13, "Thai-Wav2vec2-model-to-ONNX-model"]], "Build ONNX Model": [[13, "Build-ONNX-Model"]], "Inference": [[13, "Inference"]], "WangchanBERTa: Getting Started Notebook": [[14, "WangchanBERTa:-Getting-Started-Notebook"]], "Choose Pretrained Model": [[14, "Choose-Pretrained-Model"]], "Masked Token Prediction": [[14, "Masked-Token-Prediction"]], "Sequence Classification": [[14, "Sequence-Classification"]], "Pretrained Multi-class Classifiers - Wisesight Sentiment and Wongnai Reviews": [[14, "Pretrained-Multi-class-Classifiers---Wisesight-Sentiment-and-Wongnai-Reviews"]], "Token Classification": [[14, "Token-Classification"]], "Pretrained Token Classifiers - ThaiNER and LST20": [[14, "Pretrained-Token-Classifiers---ThaiNER-and-LST20"]], "Document Vectors": [[14, "Document-Vectors"]], "Feature Extraction": [[14, "Feature-Extraction"]], "Zero-shot Text Classification": [[14, "Zero-shot-Text-Classification"]], "Thai2Vec Embeddings Examples": [[15, "Thai2Vec-Embeddings-Examples"]], "Imports": [[15, "Imports"]], "Word Arithmetic": [[15, "Word-Arithmetic"]], "Doesn\u2019t Match": [[15, "Doesn't-Match"]], "Cosine Similarity": [[15, "Cosine-Similarity"]]}, "indexentries": {}}) \ No newline at end of file