From d1cd0819e9d94348f339f23370de7f44e2d8fa05 Mon Sep 17 00:00:00 2001
From: Pat Chormai <pat.chormai@gmail.com>
Date: Sun, 7 Jun 2020 22:07:24 +0200
Subject: [PATCH 1/4] issue-424: add benchmark to cli

---
 bin/word-tokenization-benchmark | 123 --------------------------
 docs/notes/command_line.rst     |  21 +++++
 pythainlp/cli/__init__.py       |   4 +-
 pythainlp/cli/benchmark.py      | 147 ++++++++++++++++++++++++++++++++
 4 files changed, 170 insertions(+), 125 deletions(-)
 delete mode 100644 bin/word-tokenization-benchmark
 create mode 100644 pythainlp/cli/benchmark.py

diff --git a/bin/word-tokenization-benchmark b/bin/word-tokenization-benchmark
deleted file mode 100644
index f2d9d86a3..000000000
--- a/bin/word-tokenization-benchmark
+++ /dev/null
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import argparse
-import json
-import os
-
-import yaml
-from pythainlp.benchmarks import word_tokenization
-
-parser = argparse.ArgumentParser(
-    description="Script for benchmarking tokenizaiton results"
-)
-
-parser.add_argument(
-    "--input-file",
-    action="store",
-    help="Path to input file to compare against the test file",
-)
-
-parser.add_argument(
-    "--test-file",
-    action="store",
-    help="Path to test file i.e. ground truth",
-)
-
-parser.add_argument(
-    "--save-details",
-    default=False,
-    action="store_true",
-    help="Save comparison details to files (eval-XXX.json and eval-details-XXX.json)",
-)
-
-args = parser.parse_args()
-
-
-def _read_file(path):
-    with open(path, "r", encoding="utf-8") as f:
-        lines = map(lambda r: r.strip(), f.readlines())
-    return list(lines)
-
-
-print(args.input_file)
-actual = _read_file(args.input_file)
-expected = _read_file(args.test_file)
-
-assert len(actual) == len(
-    expected
-), "Input and test files do not have the same number of samples"
-print(
-    "Benchmarking %s against %s with %d samples in total"
-    % (args.input_file, args.test_file, len(actual))
-)
-
-df_raw = word_tokenization.benchmark(expected, actual)
-
-
-columns = [
-    "char_level:tp",
-    "char_level:fp",
-    "char_level:tn",
-    "char_level:fn",
-    "word_level:correctly_tokenised_words",
-    "word_level:total_words_in_sample",
-    "word_level:total_words_in_ref_sample",
-]
-
-statistics = dict()
-
-for c in columns:
-    statistics[c] = float(df_raw[c].sum())
-
-statistics["char_level:precision"] = statistics["char_level:tp"] / (
-    statistics["char_level:tp"] + statistics["char_level:fp"]
-)
-
-statistics["char_level:recall"] = statistics["char_level:tp"] / (
-    statistics["char_level:tp"] + statistics["char_level:fn"]
-)
-
-statistics["word_level:precision"] = statistics["word_level:correctly_tokenised_words"] \
-    / statistics["word_level:total_words_in_sample"]
-
-statistics["word_level:recall"] = statistics["word_level:correctly_tokenised_words"] \
-    / statistics["word_level:total_words_in_ref_sample"]
-
-print("============== Benchmark Result ==============")
-
-for c in ["tp", "fn", "tn", "fp", "precision", "recall"]:
-    c = f"char_level:{c}"
-    v = statistics[c]
-    print(f"{c:>40s} {v:.4f}")
-
-for c in ["total_words_in_sample", "total_words_in_ref_sample", "correctly_tokenised_words", "precision", "recall"]:
-    c = f"word_level:{c}"
-    v = statistics[c]
-    print(f"{c:>40s} {v:.4f}")
-
-if args.save_details:
-    dir_name = os.path.dirname(args.input_file)
-    file_name = args.input_file.split("/")[-1].split(".")[0]
-
-    res_path = "%s/eval-%s.yml" % (dir_name, file_name)
-    print("Evaluation result is saved to %s" % res_path)
-
-    with open(res_path, "w", encoding="utf-8") as outfile:
-        yaml.dump(statistics, outfile, default_flow_style=False)
-
-    res_path = "%s/eval-details-%s.json" % (dir_name, file_name)
-    print("Details of comparisons is saved to %s" % res_path)
-
-    with open(res_path, "w", encoding="utf-8") as f:
-        samples = []
-        for i, r in enumerate(df_raw.to_dict("records")):
-            expected, actual = r["expected"], r["actual"]
-            del r["expected"]
-            del r["actual"]
-
-            samples.append(dict(metrics=r, expected=expected, actual=actual, id=i))
-
-        details = dict(metrics=statistics, samples=samples)
-
-        json.dump(details, f, ensure_ascii=False)
diff --git a/docs/notes/command_line.rst b/docs/notes/command_line.rst
index 54722af0d..1deb5edf0 100644
--- a/docs/notes/command_line.rst
+++ b/docs/notes/command_line.rst
@@ -73,6 +73,27 @@ You can use some thainlp functions directly from command line.
 
     $ thainlp data --help
 
+**Benchmark**::
+
+    thainlp  benchmark word-tokenization --input-file <source> --test-file <label> [--save-details]
+
+*Example*::
+
+    $thainlp  benchmark word-tokenization --input-file wisesight-1000-deepcut.txt --test-file wisesight-1000.label
+    Benchmarking wisesight-1000-deepcut.txt against .wisesight-1000.label with 993 samples in total
+    ============== Benchmark Result ==============
+                           char_level:tp 17654.0000
+                           char_level:fn 1153.0000
+                           char_level:tn 50755.0000
+                           char_level:fp 1478.0000
+                    char_level:precision 0.9227
+                       char_level:recall 0.9387
+        word_level:total_words_in_sample 19132.0000
+    word_level:total_words_in_ref_sample 18807.0000
+    word_level:correctly_tokenised_words 15637.0000
+                    word_level:precision 0.8173
+                       word_level:recall 0.8314
+
 **Help**::
 
     thainlp --help
diff --git a/pythainlp/cli/__init__.py b/pythainlp/cli/__init__.py
index e412d5e56..245b69fdc 100644
--- a/pythainlp/cli/__init__.py
+++ b/pythainlp/cli/__init__.py
@@ -2,10 +2,10 @@
 import sys
 from argparse import ArgumentParser
 
-from pythainlp.cli import data, soundex, tag, tokenize
+from pythainlp.cli import data, soundex, tag, tokenize, benchmark
 
 # a command should be a verb when possible
-COMMANDS = sorted(["data", "soundex", "tag", "tokenize"])
+COMMANDS = sorted(["data", "soundex", "tag", "tokenize", "benchmark"])
 
 CLI_NAME = "thainlp"
 
diff --git a/pythainlp/cli/benchmark.py b/pythainlp/cli/benchmark.py
new file mode 100644
index 000000000..3321f6a58
--- /dev/null
+++ b/pythainlp/cli/benchmark.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import argparse
+import json
+import os
+
+import yaml
+
+from pythainlp import cli
+from pythainlp.benchmarks import word_tokenization
+
+def _read_file(path):
+    with open(path, "r", encoding="utf-8") as f:
+        lines = map(lambda r: r.strip(), f.readlines())
+    return list(lines)
+
+
+class App:
+    def __init__(self, argv):
+        parser = argparse.ArgumentParser(
+            prog="benchmark",
+            description="Benchmark for various tasks; currently, we have only for word tokenization.",
+            usage=(
+                "thainlp benchmark [task] [task-options]\n\n"
+                "tasks:\n\n"
+                "word-tokenization      benchmark word tokenization\n\n"
+                "--"
+            ),
+        )
+
+        parser.add_argument(
+            "task", type=str, help="[word-tokenization]"
+        )
+
+        args = parser.parse_args(argv[2:3])
+        cli.exit_if_empty(args.task, parser)
+        task = str.lower(args.task)
+
+        task_argv = argv[3:]
+        if task == "word-tokenization":
+            WordTokenizationBenchmark(task, task_argv)
+
+class WordTokenizationBenchmark:
+    def __init__(self, name, argv):
+        parser = argparse.ArgumentParser(**cli.make_usage("benchmark " + name))
+
+        parser.add_argument(
+            "--input-file",
+            action="store",
+            help="Path to input file to compare against the test file",
+        )
+
+        parser.add_argument(
+            "--test-file",
+            action="store",
+            help="Path to test file i.e. ground truth",
+        )
+
+        parser.add_argument(
+            "--save-details",
+            default=False,
+            action="store_true",
+            help="Save comparison details to files (eval-XXX.json and eval-details-XXX.json)",
+        )
+
+        args = parser.parse_args(argv)
+
+        actual = _read_file(args.input_file)
+        expected = _read_file(args.test_file)
+
+        assert len(actual) == len(
+            expected
+        ), "Input and test files do not have the same number of samples"
+        print(
+            "Benchmarking %s against %s with %d samples in total"
+            % (args.input_file, args.test_file, len(actual))
+        )
+
+        df_raw = word_tokenization.benchmark(expected, actual)
+
+        columns = [
+            "char_level:tp",
+            "char_level:fp",
+            "char_level:tn",
+            "char_level:fn",
+            "word_level:correctly_tokenised_words",
+            "word_level:total_words_in_sample",
+            "word_level:total_words_in_ref_sample",
+        ]
+
+        statistics = dict()
+
+        for c in columns:
+            statistics[c] = float(df_raw[c].sum())
+
+        statistics["char_level:precision"] = statistics["char_level:tp"] / (
+            statistics["char_level:tp"] + statistics["char_level:fp"]
+        )
+
+        statistics["char_level:recall"] = statistics["char_level:tp"] / (
+            statistics["char_level:tp"] + statistics["char_level:fn"]
+        )
+
+        statistics["word_level:precision"] = statistics["word_level:correctly_tokenised_words"] \
+            / statistics["word_level:total_words_in_sample"]
+
+        statistics["word_level:recall"] = statistics["word_level:correctly_tokenised_words"] \
+            / statistics["word_level:total_words_in_ref_sample"]
+
+        print("============== Benchmark Result ==============")
+
+        for c in ["tp", "fn", "tn", "fp", "precision", "recall"]:
+            c = f"char_level:{c}"
+            v = statistics[c]
+            print(f"{c:>40s} {v:.4f}")
+
+        for c in ["total_words_in_sample", "total_words_in_ref_sample", "correctly_tokenised_words", "precision", "recall"]:
+            c = f"word_level:{c}"
+            v = statistics[c]
+            print(f"{c:>40s} {v:.4f}")
+
+        if args.save_details:
+            dir_name = os.path.dirname(args.input_file)
+            file_name = args.input_file.split("/")[-1].split(".")[0]
+
+            res_path = "%s/eval-%s.yml" % (dir_name, file_name)
+            print("Evaluation result is saved to %s" % res_path)
+
+            with open(res_path, "w", encoding="utf-8") as outfile:
+                yaml.dump(statistics, outfile, default_flow_style=False)
+
+            res_path = "%s/eval-details-%s.json" % (dir_name, file_name)
+            print("Details of comparisons is saved to %s" % res_path)
+
+            with open(res_path, "w", encoding="utf-8") as f:
+                samples = []
+                for i, r in enumerate(df_raw.to_dict("records")):
+                    expected, actual = r["expected"], r["actual"]
+                    del r["expected"]
+                    del r["actual"]
+
+                    samples.append(dict(metrics=r, expected=expected, actual=actual, id=i))
+
+                details = dict(metrics=statistics, samples=samples)
+
+                json.dump(details, f, ensure_ascii=False)
\ No newline at end of file

From e4dbe19a6b66bdbed148698d979489c3c53f2087 Mon Sep 17 00:00:00 2001
From: Pat Chormai <pat.chormai@gmail.com>
Date: Sun, 7 Jun 2020 22:07:36 +0200
Subject: [PATCH 2/4] issue-424: update deps

---
 requirements.txt | 2 ++
 setup.py         | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index eab5c993e..48a5213c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 python-crfsuite==0.9.*
 requests==2.23.*
 tinydb==4.1.*
+PyYAML==5.3.1
+numpy==1.18.5
\ No newline at end of file
diff --git a/setup.py b/setup.py
index e9c75754b..bee27f944 100644
--- a/setup.py
+++ b/setup.py
@@ -46,7 +46,7 @@
 
 extras = {
     "attacut": ["attacut>=1.0.6"],
-    "benchmarks": ["numpy>=1.16.1", "pandas>=0.24"],
+    "benchmarks": ["numpy>=1.16.1", "pandas>=0.24", "PyYAML>=5.3.1"],
     "icu": ["pyicu>=2.3"],
     "ipa": ["epitran>=1.1"],
     "ml": ["numpy>=1.16", "torch>=1.0.0"],

From bb53f039400de47b9265aabe8b22e5568e0cb0d6 Mon Sep 17 00:00:00 2001
From: Pat Chormai <pat.chormai@gmail.com>
Date: Sun, 7 Jun 2020 22:15:40 +0200
Subject: [PATCH 3/4] issue-424: fix coding style

---
 pythainlp/cli/benchmark.py | 42 +++++++++++++++++++++++++++++---------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/pythainlp/cli/benchmark.py b/pythainlp/cli/benchmark.py
index 3321f6a58..97715211c 100644
--- a/pythainlp/cli/benchmark.py
+++ b/pythainlp/cli/benchmark.py
@@ -10,6 +10,7 @@
 from pythainlp import cli
 from pythainlp.benchmarks import word_tokenization
 
+
 def _read_file(path):
     with open(path, "r", encoding="utf-8") as f:
         lines = map(lambda r: r.strip(), f.readlines())
@@ -20,7 +21,10 @@ class App:
     def __init__(self, argv):
         parser = argparse.ArgumentParser(
             prog="benchmark",
-            description="Benchmark for various tasks; currently, we have only for word tokenization.",
+            description=(
+                "Benchmark for various tasks;"
+                "currently, we have only for word tokenization."
+            ),
             usage=(
                 "thainlp benchmark [task] [task-options]\n\n"
                 "tasks:\n\n"
@@ -41,6 +45,7 @@ def __init__(self, argv):
         if task == "word-tokenization":
             WordTokenizationBenchmark(task, task_argv)
 
+
 class WordTokenizationBenchmark:
     def __init__(self, name, argv):
         parser = argparse.ArgumentParser(**cli.make_usage("benchmark " + name))
@@ -61,7 +66,10 @@ def __init__(self, name, argv):
             "--save-details",
             default=False,
             action="store_true",
-            help="Save comparison details to files (eval-XXX.json and eval-details-XXX.json)",
+            help=(
+                "Save comparison details to files (eval-XXX.json"
+                " and eval-details-XXX.json)"
+            )
         )
 
         args = parser.parse_args(argv)
@@ -69,9 +77,9 @@ def __init__(self, name, argv):
         actual = _read_file(args.input_file)
         expected = _read_file(args.test_file)
 
-        assert len(actual) == len(
-            expected
-        ), "Input and test files do not have the same number of samples"
+        assert len(actual) == len(expected), \
+            "Input and test files do not have the same number of samples"
+
         print(
             "Benchmarking %s against %s with %d samples in total"
             % (args.input_file, args.test_file, len(actual))
@@ -102,10 +110,12 @@ def __init__(self, name, argv):
             statistics["char_level:tp"] + statistics["char_level:fn"]
         )
 
-        statistics["word_level:precision"] = statistics["word_level:correctly_tokenised_words"] \
+        statistics["word_level:precision"] = \
+            statistics["word_level:correctly_tokenised_words"] \
             / statistics["word_level:total_words_in_sample"]
 
-        statistics["word_level:recall"] = statistics["word_level:correctly_tokenised_words"] \
+        statistics["word_level:recall"] = \
+            statistics["word_level:correctly_tokenised_words"] \
             / statistics["word_level:total_words_in_ref_sample"]
 
         print("============== Benchmark Result ==============")
@@ -115,7 +125,13 @@ def __init__(self, name, argv):
             v = statistics[c]
             print(f"{c:>40s} {v:.4f}")
 
-        for c in ["total_words_in_sample", "total_words_in_ref_sample", "correctly_tokenised_words", "precision", "recall"]:
+        for c in [
+                    "total_words_in_sample",
+                    "total_words_in_ref_sample",
+                    "correctly_tokenised_words",
+                    "precision",
+                    "recall"
+                 ]:
             c = f"word_level:{c}"
             v = statistics[c]
             print(f"{c:>40s} {v:.4f}")
@@ -140,8 +156,14 @@ def __init__(self, name, argv):
                     del r["expected"]
                     del r["actual"]
 
-                    samples.append(dict(metrics=r, expected=expected, actual=actual, id=i))
+                    samples.append(
+                        dict(
+                            metrics=r,
+                            expected=expected,
+                            actual=actual, id=i
+                        )
+                    )
 
                 details = dict(metrics=statistics, samples=samples)
 
-                json.dump(details, f, ensure_ascii=False)
\ No newline at end of file
+                json.dump(details, f, ensure_ascii=False)

From 178d36ed460a8852ac1cbb24336697d4ecc550df Mon Sep 17 00:00:00 2001
From: Pat Chormai <pat.chormai@gmail.com>
Date: Mon, 8 Jun 2020 08:13:24 +0200
Subject: [PATCH 4/4] issue-424: update deps and remove script path

---
 setup.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index bee27f944..3d9886cb4 100644
--- a/setup.py
+++ b/setup.py
@@ -55,6 +55,7 @@
     "thai2rom": ["torch>=1.0.0", "numpy>=1.16.1"],
     "wordnet": ["nltk>=3.3.*"],
     "full": [
+        "PyYAML>=5.3.1",
         "attacut>=1.0.4",
         "emoji>=0.5.1",
         "epitran>=1.1",
@@ -130,9 +131,6 @@
         "Topic :: Text Processing :: General",
         "Topic :: Text Processing :: Linguistic",
     ],
-    scripts=[
-        "bin/word-tokenization-benchmark",
-    ],
     entry_points={
         "console_scripts": [
             "thainlp = pythainlp.__main__:main",