updated README

HanGuo97 · Jun 12, 2018 · 2a7ddd3 · 2a7ddd3
commit 2a7ddd3
Show file tree

Hide file tree

Showing 34 changed files with 6,209 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,108 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# my own todos
+TODOs.md
+_*.sh
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Han Guo
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,42 @@
+# Data Preprocessing
+Please follow the instructions from [Zhang et al. 2017](https://github.com/XingxingZhang/dress) for downloading the pre-processed dataset.
+To build the .bin files please follow the instructions from [See et al. 2017](https://github.com/abisee/pointer-generator), or [here](https://github.com/abisee/cnn-dailymail).
+
+# Evaluation Set-Up
+* Please follow the instructions from [Zhang et al. 2017](https://github.com/XingxingZhang/dress) for setting up the evaluation system.
+* FKGL implementations can be found [in this repo](https://github.com/mmautner/readability).
+* Modify corresponding directories in `evaluation_utils/sentence_simplification.py`.
+* Please note that evaluation metrics are calculated on corpus level.
+
+
+# Dependencies
+python 2.7  
+tensorflow 1.4
+
+# Usage
+```bash
+CUDA_VISIBLE_DEVICES="GPU_ID" python run.py \
+    --mode "string" \
+    --vocab_path "/path/to/vocab/file" \
+    --train_data_dirs "/path/to/trainig/data_1,/path/to/trainig/data_2,/path/to/trainig/data_3" \
+    --val_data_dir "/path/to/validation/data_1" \
+    --decode_data_dir "/path/to/decode/data_1" \
+    --eval_source_dir "/path/to/validation/data_1.source" \
+    --eval_target_dir "/path/to/validation/data_1.target" \
+    --max_enc_steps "int" --max_dec_steps "int" --batch_size "int" --steps_per_eval "int" \
+    --log_root "/path/to/log/root/" --exp_name "string" [--autoMR] \
+    --lr "float" --beam_size "int" --soft_sharing_coef "float"  --mixing_ratios "mr_1,mr_2"\
+    --decode_ckpt_file "/path/to/ckpt" --decode_output_file "/path/to/file"
+
+```
+Pretrained models can be found [here](https://drive.google.com/file/d/1MJ6kq8nGfPcQaTZMreavkMET-BlG93Ij/view?usp=sharing).
+
+# Citation
+```
+@inproceedings{guo2018dynamic,
+    title = {Dynamic Multi-Level Multi-Task Learning for Sentence Simplification},
+    author = {Han Guo and Ramakanth Pasunuru and Mohit Bansal},
+    booktitle = {Proceedings of the 27th International Conference on Computational Linguistics (COLING 2018)},
+    year = {2018}
+}
+```
diff --git a/__init__.py b/__init__.py
diff --git a/evaluation_utils/__init__.py b/evaluation_utils/__init__.py
diff --git a/evaluation_utils/bleu.py b/evaluation_utils/bleu.py
@@ -0,0 +1,114 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Python implementation of BLEU and smooth-BLEU.
+
+This module provides a Python implementation of BLEU and smooth-BLEU.
+Smooth BLEU is computed following the method outlined in the paper:
+Chin-Yew Lin, Franz Josef Och. ORANGE: a method for evaluating automatic
+evaluation metrics for machine translation. COLING 2004.
+
+https://github.com/tensorflow/nmt
+"""
+
+import collections
+import math
+
+
+def _get_ngrams(segment, max_order):
+  """Extracts all n-grams upto a given maximum order from an input segment.
+
+  Args:
+    segment: text segment from which n-grams will be extracted.
+    max_order: maximum length in tokens of the n-grams returned by this
+        methods.
+
+  Returns:
+    The Counter containing all n-grams upto max_order in segment
+    with a count of how many times each n-gram occurred.
+  """
+  ngram_counts = collections.Counter()
+  for order in range(1, max_order + 1):
+    for i in range(0, len(segment) - order + 1):
+      ngram = tuple(segment[i:i+order])
+      ngram_counts[ngram] += 1
+  return ngram_counts
+
+
+def compute_bleu(reference_corpus, translation_corpus, max_order=4,
+                 smooth=False):
+  """Computes BLEU score of translated segments against one or more references.
+
+  Args:
+    reference_corpus: list of lists of references for each translation. Each
+        reference should be tokenized into a list of tokens.
+    translation_corpus: list of translations to score. Each translation
+        should be tokenized into a list of tokens.
+    max_order: Maximum n-gram order to use when computing BLEU score.
+    smooth: Whether or not to apply Lin et al. 2004 smoothing.
+
+  Returns:
+    3-Tuple with the BLEU score, n-gram precisions, geometric mean of n-gram
+    precisions and brevity penalty.
+  """
+  matches_by_order = [0] * max_order
+  possible_matches_by_order = [0] * max_order
+  reference_length = 0
+  translation_length = 0
+  for (references, translation) in zip(reference_corpus,
+                                       translation_corpus):
+    reference_length += min(len(r) for r in references)
+    translation_length += len(translation)
+
+    merged_ref_ngram_counts = collections.Counter()
+    for reference in references:
+      merged_ref_ngram_counts |= _get_ngrams(reference, max_order)
+    translation_ngram_counts = _get_ngrams(translation, max_order)
+    overlap = translation_ngram_counts & merged_ref_ngram_counts
+    for ngram in overlap:
+      matches_by_order[len(ngram)-1] += overlap[ngram]
+    for order in range(1, max_order+1):
+      possible_matches = len(translation) - order + 1
+      if possible_matches > 0:
+        possible_matches_by_order[order-1] += possible_matches
+
+  precisions = [0] * max_order
+  for i in range(0, max_order):
+    if smooth:
+      precisions[i] = ((matches_by_order[i] + 1.) /
+                       (possible_matches_by_order[i] + 1.))
+    else:
+      if possible_matches_by_order[i] > 0:
+        precisions[i] = (float(matches_by_order[i]) /
+                         possible_matches_by_order[i])
+      else:
+        precisions[i] = 0.0
+
+  if min(precisions) > 0:
+    p_log_sum = sum((1. / max_order) * math.log(p) for p in precisions)
+    geo_mean = math.exp(p_log_sum)
+  else:
+    geo_mean = 0
+
+  ratio = float(translation_length) / reference_length
+
+  if ratio > 1.0:
+    bp = 1.
+  else:
+    bp = math.exp(1 - 1. / ratio)
+
+  bleu = geo_mean * bp
+
+  return (bleu, precisions, bp, ratio, translation_length, reference_length)
diff --git a/evaluation_utils/evaluators.py b/evaluation_utils/evaluators.py
@@ -0,0 +1,40 @@
+from evaluation_utils import text_summarization
+from evaluation_utils import sentence_simplification
+SUPPORTED_TASKS = ["WikiLarge", "WikiSmall", "Newsela"]
+
+
+def evaluate(mode,
+             gen_file,
+             ref_file=None,
+             execute_dir=None,
+             source_file=None,
+             evaluation_task=None,
+             deanonymize_file=True):
+    """
+    Evaluate the model on validation set
+
+    Args:
+        gen_file: model outputs
+        ref_file: reference file
+        execute_dir: directory to `ducrush` perl evaluation folder
+                     or directory to `JOSHUA` program directory
+        source_file: directory to WikiLarge evaluation source
+        evaluation_task: task to run evaluation
+    """
+    if mode not in ["val", "test"]:
+        raise ValueError("Unsupported mode ", mode)
+
+    if evaluation_task not in SUPPORTED_TASKS:
+        raise ValueError("%s is not supported" % evaluation_task)
+
+    scores = sentence_simplification.evaluate(
+        mode=mode,
+        gen_file=gen_file,
+        ref_file=ref_file,
+        execute_dir=execute_dir,
+        source_file=source_file,
+        evaluation_task=evaluation_task,
+        deanonymize_file=deanonymize_file)
+
+
+    return scores