From a548ac13559d35fe54aa2d3d7beabd847ea3036b Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Thu, 18 Jul 2019 18:32:42 +0200
Subject: [PATCH 01/31] Add test.

---
 tests/cli/test_rasa_train.py | 52 +++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py
index 81f79e612c3f..aa11b9764c42 100644
--- a/tests/cli/test_rasa_train.py
+++ b/tests/cli/test_rasa_train.py
@@ -12,7 +12,8 @@
     CONFIG_MANDATORY_KEYS,
     CONFIG_MANDATORY_KEYS_NLU,
 )
-from rasa.nlu.utils import list_files
+from rasa.nlu.utils import list_files, list_subdirectories
+from utils.io import write_yaml_file
 
 
 def test_train(run_in_default_project):
@@ -38,6 +39,55 @@ def test_train(run_in_default_project):
     assert os.path.basename(files[0]) == "test-model.tar.gz"
 
 
+def test_train_core_compare(run_in_default_project):
+    temp_dir = os.getcwd()
+
+    write_yaml_file(
+        {
+            "language": "en",
+            "pipeline": "supervised_embeddings",
+            "policies": [{"name": "KerasPolicy"}],
+        },
+        "config_1.yml",
+    )
+
+    write_yaml_file(
+        {
+            "language": "en",
+            "pipeline": "supervised_embeddings",
+            "policies": [{"name": "MemoizationPolicy"}],
+        },
+        "config_2.yml",
+    )
+
+    run_in_default_project(
+        "train",
+        "core",
+        "-c",
+        "config_1.yml",
+        "config_2.yml",
+        "--stories",
+        "data/stories.md",
+        "--out",
+        "core_comparison_results",
+        "--runs",
+        "1",
+        "--runs",
+        "1",
+        "--percentages",
+        "50",
+        "--augmentation",
+        "5",
+    )
+
+    assert os.path.exists(os.path.join(temp_dir, "core_comparison_results"))
+    sub_dirs = list_subdirectories(os.path.join(temp_dir, "core_comparison_results"))
+    assert len(sub_dirs) == 1
+    files = list_files(os.path.join(temp_dir, "core_comparison_results", sub_dirs[0]))
+    assert len(files) == 1
+    assert files[0].endswith("tar.gz")
+
+
 def test_train_no_domain_exists(run_in_default_project):
 
     os.remove("domain.yml")

From 359663e96aed6f0a8d53ec5e0b2c31f9cc64a5de Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 19 Jul 2019 10:21:09 +0200
Subject: [PATCH 02/31] package model on rasa train core compare

---
 rasa/core/train.py           | 44 ++++++++++++++++++++++++++----------
 rasa/train.py                |  8 +++----
 tests/cli/test_rasa_train.py | 11 ++++-----
 tests/test_train.py          |  4 ++--
 4 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/rasa/core/train.py b/rasa/core/train.py
index d1ede8431651..637e502e9c67 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -1,9 +1,12 @@
 import logging
 import os
+import tempfile
 import typing
+from contextlib import ExitStack
 from typing import Dict, Optional, Text, Union
 
 from rasa.core.domain import Domain
+from rasa.utils.common import TempDirectoryPath
 
 if typing.TYPE_CHECKING:
     from rasa.core.interpreter import NaturalLanguageInterpreter
@@ -75,6 +78,8 @@ async def train_comparison_models(
 ):
     """Train multiple models for comparison of policies"""
     from rasa.core import config
+    from rasa import model
+    from rasa.train import package_model
 
     exclusion_percentages = exclusion_percentages or []
     policy_configs = policy_configs or []
@@ -94,9 +99,8 @@ async def train_comparison_models(
                     )
 
                 policy_name = type(policies[0]).__name__
-                output = os.path.join(
-                    output_path, "run_" + str(r + 1), policy_name + str(current_round)
-                )
+                output_dir = os.path.join(output_path, "run_" + str(r + 1))
+                model_name = policy_name + str(current_round)
 
                 logging.info(
                     "Starting to train {} round {}/{}"
@@ -104,15 +108,31 @@ async def train_comparison_models(
                     "".format(policy_name, current_round, len(exclusion_percentages), i)
                 )
 
-                await train(
-                    domain,
-                    stories,
-                    output,
-                    policy_config=policy_config,
-                    exclusion_percentage=i,
-                    kwargs=kwargs,
-                    dump_stories=dump_stories,
-                )
+                with ExitStack() as stack:
+                    train_path = stack.enter_context(
+                        TempDirectoryPath(tempfile.mkdtemp())
+                    )
+
+                    await train(
+                        domain,
+                        stories,
+                        train_path,
+                        policy_config=policy_config,
+                        exclusion_percentage=i,
+                        kwargs=kwargs,
+                        dump_stories=dump_stories,
+                    )
+
+                    new_fingerprint = model.model_fingerprint(
+                        policy_config, domain, stories=stories
+                    )
+
+                    package_model(
+                        new_fingerprint=new_fingerprint,
+                        output_path=output_dir,
+                        train_path=train_path,
+                        fixed_model_name=model_name,
+                    )
 
 
 async def get_no_of_stories(story_file, domain):
diff --git a/rasa/train.py b/rasa/train.py
index 0802536fba4e..688f0dadf8e2 100644
--- a/rasa/train.py
+++ b/rasa/train.py
@@ -203,7 +203,7 @@ async def _train_async_internal(
             kwargs=kwargs,
         )
 
-        return _package_model(
+        return package_model(
             new_fingerprint=new_fingerprint,
             output_path=output_path,
             train_path=train_path,
@@ -383,7 +383,7 @@ async def _train_core_with_validated_data(
             new_fingerprint = model.model_fingerprint(
                 config, domain, stories=story_directory
             )
-            return _package_model(
+            return package_model(
                 new_fingerprint=new_fingerprint,
                 output_path=output,
                 train_path=_train_path,
@@ -470,7 +470,7 @@ def _train_nlu_with_validated_data(
                 config, nlu_data=nlu_data_directory
             )
 
-            return _package_model(
+            return package_model(
                 new_fingerprint=new_fingerprint,
                 output_path=output,
                 train_path=_train_path,
@@ -481,7 +481,7 @@ def _train_nlu_with_validated_data(
         return _train_path
 
 
-def _package_model(
+def package_model(
     new_fingerprint: Fingerprint,
     output_path: Text,
     train_path: Text,
diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py
index aa11b9764c42..108c26adec29 100644
--- a/tests/cli/test_rasa_train.py
+++ b/tests/cli/test_rasa_train.py
@@ -71,20 +71,19 @@ def test_train_core_compare(run_in_default_project):
         "--out",
         "core_comparison_results",
         "--runs",
-        "1",
-        "--runs",
-        "1",
+        "2",
         "--percentages",
-        "50",
+        "25",
+        "75",
         "--augmentation",
         "5",
     )
 
     assert os.path.exists(os.path.join(temp_dir, "core_comparison_results"))
     sub_dirs = list_subdirectories(os.path.join(temp_dir, "core_comparison_results"))
-    assert len(sub_dirs) == 1
+    assert len(sub_dirs) == 2
     files = list_files(os.path.join(temp_dir, "core_comparison_results", sub_dirs[0]))
-    assert len(files) == 1
+    assert len(files) == 4
     assert files[0].endswith("tar.gz")
 
 
diff --git a/tests/test_train.py b/tests/test_train.py
index 3221151bf832..2c1fa007f311 100644
--- a/tests/test_train.py
+++ b/tests/test_train.py
@@ -6,7 +6,7 @@
 
 from rasa.model import unpack_model
 
-from rasa.train import _package_model, train
+from rasa.train import package_model, train
 from tests.core.test_model import _fingerprint
 
 TEST_TEMP = "test_tmp"
@@ -24,7 +24,7 @@ def test_package_model(trained_rasa_model, parameters):
     output_path = tempfile.mkdtemp()
     train_path = unpack_model(trained_rasa_model)
 
-    model_path = _package_model(
+    model_path = package_model(
         _fingerprint(),
         output_path,
         train_path,

From 2c25807ab2c1841a6bcc2463b0d31a1c52ed6747 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 19 Jul 2019 14:50:28 +0200
Subject: [PATCH 03/31] Update types.

---
 rasa/cli/train.py  |  2 +-
 rasa/constants.py  |  1 +
 rasa/core/train.py | 55 +++++++++++++++++++++++++---------------------
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/rasa/cli/train.py b/rasa/cli/train.py
index e23f5c93b1e5..81d3c6495ce5 100644
--- a/rasa/cli/train.py
+++ b/rasa/cli/train.py
@@ -114,7 +114,7 @@ def train_core(
     else:
         from rasa.core.train import do_compare_training
 
-        loop.run_until_complete(do_compare_training(args, stories, None))
+        loop.run_until_complete(do_compare_training(args, stories))
         return None
 
 
diff --git a/rasa/constants.py b/rasa/constants.py
index 346fd89439b3..05d3b3942ca3 100644
--- a/rasa/constants.py
+++ b/rasa/constants.py
@@ -15,6 +15,7 @@
 TEST_DATA_FILE = "test.md"
 TRAIN_DATA_FILE = "train.md"
 RESULTS_FILE = "results.json"
+NUM_STORIES_FILE = "num_stories.json"
 
 PACKAGE_NAME = "rasa"
 
diff --git a/rasa/core/train.py b/rasa/core/train.py
index 637e502e9c67..0123e24bc3e9 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -1,10 +1,12 @@
+import argparse
 import logging
 import os
 import tempfile
 import typing
 from contextlib import ExitStack
-from typing import Dict, Optional, Text, Union
+from typing import Dict, Optional, Text, Union, List
 
+from constants import NUM_STORIES_FILE
 from rasa.core.domain import Domain
 from rasa.utils.common import TempDirectoryPath
 
@@ -67,14 +69,14 @@ async def train(
 
 
 async def train_comparison_models(
-    stories,
-    domain,
-    output_path="",
-    exclusion_percentages=None,
-    policy_configs=None,
-    runs=1,
-    dump_stories=False,
-    kwargs=None,
+    story_file: Text,
+    domain: Text,
+    output_path: Text = "",
+    exclusion_percentages: Optional[List] = None,
+    policy_configs: Optional[List] = None,
+    runs: int = 1,
+    dump_stories: bool = False,
+    kwargs: Optional[dict] = None,
 ):
     """Train multiple models for comparison of policies"""
     from rasa.core import config
@@ -99,9 +101,6 @@ async def train_comparison_models(
                     )
 
                 policy_name = type(policies[0]).__name__
-                output_dir = os.path.join(output_path, "run_" + str(r + 1))
-                model_name = policy_name + str(current_round)
-
                 logging.info(
                     "Starting to train {} round {}/{}"
                     " with {}% exclusion"
@@ -115,7 +114,7 @@ async def train_comparison_models(
 
                     await train(
                         domain,
-                        stories,
+                        story_file,
                         train_path,
                         policy_config=policy_config,
                         exclusion_percentage=i,
@@ -124,9 +123,11 @@ async def train_comparison_models(
                     )
 
                     new_fingerprint = model.model_fingerprint(
-                        policy_config, domain, stories=stories
+                        policy_config, domain, stories=story_file
                     )
 
+                    output_dir = os.path.join(output_path, "run_" + str(r + 1))
+                    model_name = policy_name + str(current_round)
                     package_model(
                         new_fingerprint=new_fingerprint,
                         output_path=output_dir,
@@ -146,29 +147,33 @@ async def get_no_of_stories(story_file, domain):
     return len(stories)
 
 
-async def do_compare_training(cmdline_args, stories, additional_arguments):
+async def do_compare_training(
+    args: argparse.Namespace,
+    story_file: Text,
+    additional_arguments: Optional[dict] = None,
+):
     from rasa.core import utils
 
     await train_comparison_models(
-        stories,
-        cmdline_args.domain,
-        cmdline_args.out,
-        cmdline_args.percentages,
-        cmdline_args.config,
-        cmdline_args.runs,
-        cmdline_args.dump_stories,
+        story_file,
+        args.domain,
+        args.out,
+        args.percentages,
+        args.config,
+        args.runs,
+        args.dump_stories,
         additional_arguments,
     )
 
-    no_stories = await get_no_of_stories(cmdline_args.stories, cmdline_args.domain)
+    no_stories = await get_no_of_stories(args.stories, args.domain)
 
     # store the list of the number of stories present at each exclusion
     # percentage
     story_range = [
-        no_stories - round((x / 100.0) * no_stories) for x in cmdline_args.percentages
+        no_stories - round((x / 100.0) * no_stories) for x in args.percentages
     ]
 
-    story_n_path = os.path.join(cmdline_args.out, "num_stories.json")
+    story_n_path = os.path.join(args.out, NUM_STORIES_FILE)
     utils.dump_obj_as_json_to_file(story_n_path, story_range)
 
 

From 5ee53c6aa040acf3b312969f1593688cf23ae76d Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 19 Jul 2019 14:53:40 +0200
Subject: [PATCH 04/31] Clean up.

---
 rasa/cli/train.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/rasa/cli/train.py b/rasa/cli/train.py
index 81d3c6495ce5..8d61caa11ca8 100644
--- a/rasa/cli/train.py
+++ b/rasa/cli/train.py
@@ -115,7 +115,6 @@ def train_core(
         from rasa.core.train import do_compare_training
 
         loop.run_until_complete(do_compare_training(args, stories))
-        return None
 
 
 def train_nlu(

From cc4b6f188d80350bd64745c2a490d49a559c571d Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 19 Jul 2019 15:02:03 +0200
Subject: [PATCH 05/31] Add Changelog entry.

---
 CHANGELOG.rst                | 1 +
 rasa/core/train.py           | 2 +-
 tests/cli/test_rasa_train.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 11fa25a25764..845949c8d8a1 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -24,6 +24,7 @@ Removed
 
 Fixed
 -----
+- ``rasa train core`` in comparison mode stores the model files compressed (``tar.gz`` files)
 
 
 [1.1.7] - 2019-07-18
diff --git a/rasa/core/train.py b/rasa/core/train.py
index 0123e24bc3e9..9c4253e0e092 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -6,7 +6,7 @@
 from contextlib import ExitStack
 from typing import Dict, Optional, Text, Union, List
 
-from constants import NUM_STORIES_FILE
+from rasa.constants import NUM_STORIES_FILE
 from rasa.core.domain import Domain
 from rasa.utils.common import TempDirectoryPath
 
diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py
index 108c26adec29..a7077296cc19 100644
--- a/tests/cli/test_rasa_train.py
+++ b/tests/cli/test_rasa_train.py
@@ -13,7 +13,7 @@
     CONFIG_MANDATORY_KEYS_NLU,
 )
 from rasa.nlu.utils import list_files, list_subdirectories
-from utils.io import write_yaml_file
+from rasa.utils.io import write_yaml_file
 
 
 def test_train(run_in_default_project):

From 647c7bd2db6f47c1acde575d2a5d9508a552950a Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 19 Jul 2019 13:10:59 +0200
Subject: [PATCH 06/31] Update types.

---
 rasa/cli/train.py |  6 +++---
 rasa/core/test.py | 28 ++++++++++------------------
 rasa/test.py      |  6 ++++--
 3 files changed, 17 insertions(+), 23 deletions(-)

diff --git a/rasa/cli/train.py b/rasa/cli/train.py
index 8d61caa11ca8..be13f7033a86 100644
--- a/rasa/cli/train.py
+++ b/rasa/cli/train.py
@@ -90,7 +90,7 @@ def train_core(
     args.domain = get_validated_path(
         args.domain, "domain", DEFAULT_DOMAIN_PATH, none_is_valid=True
     )
-    stories = get_validated_path(
+    story_file = get_validated_path(
         args.stories, "stories", DEFAULT_DATA_PATH, none_is_valid=True
     )
 
@@ -105,7 +105,7 @@ def train_core(
         return train_core(
             domain=args.domain,
             config=config,
-            stories=stories,
+            stories=story_file,
             output=output,
             train_path=train_path,
             fixed_model_name=args.fixed_model_name,
@@ -114,7 +114,7 @@ def train_core(
     else:
         from rasa.core.train import do_compare_training
 
-        loop.run_until_complete(do_compare_training(args, stories))
+        loop.run_until_complete(do_compare_training(args, story_file))
 
 
 def train_nlu(
diff --git a/rasa/core/test.py b/rasa/core/test.py
index e1b523bf55a2..d2044d146b64 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -595,30 +595,22 @@ async def compare(models: Text, stories_file: Text, output: Text) -> None:
 
     num_correct = defaultdict(list)
 
-    for run in nlu_utils.list_subdirectories(models):
-        num_correct_run = defaultdict(list)
+    for model in nlu_utils.list_subdirectories(models):
+        logger.info("Evaluating model {}".format(model))
 
-        for model in sorted(nlu_utils.list_subdirectories(run)):
-            logger.info("Evaluating model {}".format(model))
+        agent = Agent.load(model)
 
-            agent = Agent.load(model)
+        completed_trackers = await _generate_trackers(stories_file, agent)
 
-            completed_trackers = await _generate_trackers(stories_file, agent)
-
-            story_eval_store, no_of_stories = collect_story_predictions(
-                completed_trackers, agent
-            )
+        story_eval_store, no_of_stories = collect_story_predictions(
+            completed_trackers, agent
+        )
 
-            failed_stories = story_eval_store.failed_stories
-            policy_name = "".join(
-                [i for i in os.path.basename(model) if not i.isdigit()]
-            )
-            num_correct_run[policy_name].append(no_of_stories - len(failed_stories))
+        failed_stories = story_eval_store.failed_stories
 
-        for k, v in num_correct_run.items():
-            num_correct[k].append(v)
+        num_correct[os.path.basename(model)].append(no_of_stories - len(failed_stories))
 
-    utils.dump_obj_as_json_to_file(os.path.join(output, "results.json"), num_correct)
+    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
 
 
 def plot_nlu_results(output: Text, number_of_examples: List[int]) -> None:
diff --git a/rasa/test.py b/rasa/test.py
index 2d388dea3da5..0b58dd612646 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -6,7 +6,7 @@
 
 from rasa.core.interpreter import RegexInterpreter
 
-from rasa.constants import DEFAULT_RESULTS_PATH, RESULTS_FILE
+from rasa.constants import DEFAULT_RESULTS_PATH, RESULTS_FILE, NUM_STORIES_FILE
 from rasa.model import get_model, get_model_subdirectories, unpack_model
 from rasa.cli.utils import minimal_kwargs, print_error, print_warning
 from rasa.exceptions import ModelNotFound
@@ -23,7 +23,9 @@ def test_compare_core(models: List[Text], stories: Text, output: Text):
     loop = asyncio.get_event_loop()
     loop.run_until_complete(compare(model_directory, stories, output))
 
-    story_n_path = os.path.join(model_directory, "num_stories.json")
+    story_n_path = os.path.join(
+        os.path.dirname(os.path.dirname(models[0])), NUM_STORIES_FILE
+    )
     number_of_stories = rasa.utils.io.read_json_file(story_n_path)
     plot_core_results(output, number_of_stories)
 

From 182ccc53687d665d1c994225cf8574e1ef25505b Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Mon, 22 Jul 2019 13:55:44 +0200
Subject: [PATCH 07/31] Add --evaluate-models-in-dir param.

---
 rasa/cli/arguments/test.py  | 17 +++++++++++++----
 rasa/cli/test.py            | 19 +++++++++++--------
 tests/cli/test_rasa_test.py |  9 +++++----
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
index 61c695aa8474..6a0c756f35cb 100644
--- a/rasa/cli/arguments/test.py
+++ b/rasa/cli/arguments/test.py
@@ -1,7 +1,7 @@
 import argparse
 from typing import Union
 
-from rasa.constants import DEFAULT_MODELS_PATH, DEFAULT_CONFIG_PATH
+from rasa.constants import DEFAULT_MODELS_PATH, DEFAULT_RESULTS_PATH
 
 from rasa.cli.arguments.default_arguments import (
     add_stories_param,
@@ -42,7 +42,7 @@ def add_test_core_argument_group(
     )
     add_out_param(
         parser,
-        default="results",
+        default=DEFAULT_RESULTS_PATH,
         help_text="Output path for any files created during the evaluation.",
     )
     parser.add_argument(
@@ -70,6 +70,15 @@ def add_test_core_argument_group(
         "trains on it. Fetches the data by sending a GET request "
         "to the supplied URL.",
     )
+    parser.add_argument(
+        "--evaluate-models-in-dir",
+        default=False,
+        action="store_true",
+        help="Should be set to evaluate models trained via "
+        "`rasa train core --config <config-1> <config-2>`. "
+        "All models in the provided model folder are evaluated "
+        "and compared against each other.",
+    )
 
 
 def add_test_nlu_argument_group(
@@ -164,6 +173,6 @@ def add_test_core_model_param(parser: argparse.ArgumentParser):
         default=[default_path],
         help="Path to a pre-trained model. If it is a 'tar.gz' file that model file "
         "will be used. If it is a directory, the latest model in that directory "
-        "will be used. If multiple 'tar.gz' files are provided, all those models "
-        "will be compared.",
+        "will be used (exception: '--evaluate-models-in-dir' flag is set). If multiple "
+        "'tar.gz' files are provided, all those models will be compared.",
     )
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index 74b753324977..7ecbf0700510 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -15,7 +15,7 @@
     DEFAULT_NLU_RESULTS_PATH,
     CONFIG_SCHEMA_FILE,
 )
-from rasa.test import test_compare_core, compare_nlu_models
+from rasa.test import test_compare_core, compare_nlu_models, test_core_models_in_dir
 from rasa.utils.validation import validate_yaml_schema, InvalidYamlFileError
 
 logger = logging.getLogger(__name__)
@@ -77,13 +77,16 @@ def test_core(args: argparse.Namespace) -> None:
     if isinstance(args.model, str):
         model_path = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH)
 
-        test_core(
-            model=model_path,
-            stories=stories,
-            endpoints=endpoints,
-            output=output,
-            kwargs=vars(args),
-        )
+        if args.evaluate_in_dir:
+            test_core_models_in_dir(args.model, stories, output)
+        else:
+            test_core(
+                model=model_path,
+                stories=stories,
+                endpoints=endpoints,
+                output=output,
+                kwargs=vars(args),
+            )
 
     else:
         test_compare_core(args.model, stories, output)
diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index 2e1983903169..3fd4c580222c 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -59,9 +59,9 @@ def test_test_help(run):
     help_text = """usage: rasa test [-h] [-v] [-vv] [--quiet] [-m MODEL] [-s STORIES]
                  [--max-stories MAX_STORIES] [--out OUT] [--e2e]
                  [--endpoints ENDPOINTS] [--fail-on-prediction-errors]
-                 [--url URL] [-u NLU] [--report [REPORT]]
-                 [--successes [SUCCESSES]] [--errors ERRORS]
-                 [--histogram HISTOGRAM] [--confmat CONFMAT]
+                 [--url URL] [--evaluate-models-in-dir] [-u NLU]
+                 [--report [REPORT]] [--successes [SUCCESSES]]
+                 [--errors ERRORS] [--histogram HISTOGRAM] [--confmat CONFMAT]
                  [-c CONFIG [CONFIG ...]] [--cross-validation] [-f FOLDS]
                  [-r RUNS] [-p PERCENTAGES [PERCENTAGES ...]]
                  {core,nlu} ..."""
@@ -94,7 +94,8 @@ def test_test_core_help(run):
     help_text = """usage: rasa test core [-h] [-v] [-vv] [--quiet] [-m MODEL [MODEL ...]]
                       [-s STORIES] [--max-stories MAX_STORIES] [--out OUT]
                       [--e2e] [--endpoints ENDPOINTS]
-                      [--fail-on-prediction-errors] [--url URL]"""
+                      [--fail-on-prediction-errors] [--url URL]
+                      [--evaluate-models-in-dir]"""
 
     lines = help_text.split("\n")
 

From 26afeae6fb30a92964f760bf4819159646a06688 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Mon, 22 Jul 2019 14:20:53 +0200
Subject: [PATCH 08/31] Fix rasa test core with model dir.

---
 rasa/cli/test.py                  |  6 ++--
 rasa/core/test.py                 | 50 +++++++++++++++++++++++--------
 rasa/core/training/dsl.py         |  4 +--
 rasa/nlu/training_data/loading.py |  5 ++--
 rasa/nlu/training_data/util.py    |  4 +--
 rasa/nlu/utils/__init__.py        | 44 ---------------------------
 rasa/test.py                      | 48 ++++++++++++-----------------
 rasa/utils/io.py                  | 44 +++++++++++++++++++++++++++
 tests/cli/test_rasa_train.py      |  3 +-
 tests/nlu/base/test_utils.py      |  7 ++---
 10 files changed, 114 insertions(+), 101 deletions(-)

diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index 7ecbf0700510..926d97b3b30f 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -15,7 +15,7 @@
     DEFAULT_NLU_RESULTS_PATH,
     CONFIG_SCHEMA_FILE,
 )
-from rasa.test import test_compare_core, compare_nlu_models, test_core_models_in_dir
+from rasa.test import test_core_models, compare_nlu_models, test_core_models_in_dir
 from rasa.utils.validation import validate_yaml_schema, InvalidYamlFileError
 
 logger = logging.getLogger(__name__)
@@ -77,7 +77,7 @@ def test_core(args: argparse.Namespace) -> None:
     if isinstance(args.model, str):
         model_path = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH)
 
-        if args.evaluate_in_dir:
+        if args.evaluate_models_in_dir:
             test_core_models_in_dir(args.model, stories, output)
         else:
             test_core(
@@ -89,7 +89,7 @@ def test_core(args: argparse.Namespace) -> None:
             )
 
     else:
-        test_compare_core(args.model, stories, output)
+        test_core_models(args.model, stories, output)
 
 
 def test_nlu(args: argparse.Namespace) -> None:
diff --git a/rasa/core/test.py b/rasa/core/test.py
index d2044d146b64..fd5b928cb1f5 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -587,32 +587,58 @@ def plot_story_evaluation(
     fig.savefig(os.path.join(out_directory, "story_confmat.pdf"), bbox_inches="tight")
 
 
-async def compare(models: Text, stories_file: Text, output: Text) -> None:
+async def compare_models_in_dir(
+    model_dir: Text, stories_file: Text, output: Text
+) -> None:
     """Evaluates multiple trained models on a test set."""
-    from rasa.core.agent import Agent
-    import rasa.nlu.utils as nlu_utils
     from rasa.core import utils
+    import rasa.utils.io as io_utils
 
     num_correct = defaultdict(list)
 
-    for model in nlu_utils.list_subdirectories(models):
-        logger.info("Evaluating model {}".format(model))
+    for run in io_utils.list_subdirectories(model_dir):
+        num_correct_run = defaultdict(list)
 
-        agent = Agent.load(model)
+        for model in sorted(io_utils.list_files(run)):
+            model_name = "".join(
+                [i for i in os.path.basename(model) if not i.isdigit()]
+            )
+            correct_stories = await _compare(model, stories_file)
+            num_correct_run[model_name].append(correct_stories)
 
-        completed_trackers = await _generate_trackers(stories_file, agent)
+        for k, v in num_correct_run.items():
+            num_correct[k].append(v)
 
-        story_eval_store, no_of_stories = collect_story_predictions(
-            completed_trackers, agent
-        )
+    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
+
+
+async def compare_models(models: List[Text], stories_file: Text, output: Text) -> None:
+    """Evaluates multiple trained models on a test set."""
+    from rasa.core import utils
 
-        failed_stories = story_eval_store.failed_stories
+    num_correct = defaultdict(list)
 
-        num_correct[os.path.basename(model)].append(no_of_stories - len(failed_stories))
+    for model in models:
+        correct_stories = await _compare(model, stories_file)
+        num_correct[os.path.basename(model)].append(correct_stories)
 
     utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
 
 
+async def _compare(model: Text, stories_file: Text) -> int:
+    from rasa.core.agent import Agent
+
+    logger.info("Evaluating model {}".format(model))
+
+    agent = Agent.load(model)
+    completed_trackers = await _generate_trackers(stories_file, agent)
+    story_eval_store, no_of_stories = collect_story_predictions(
+        completed_trackers, agent
+    )
+    failed_stories = story_eval_store.failed_stories
+    return no_of_stories - len(failed_stories)
+
+
 def plot_nlu_results(output: Text, number_of_examples: List[int]) -> None:
 
     graph_path = os.path.join(output, "nlu_model_comparison_graph.pdf")
diff --git a/rasa/core/training/dsl.py b/rasa/core/training/dsl.py
index 69fd57d0f49b..f293024c63e8 100644
--- a/rasa/core/training/dsl.py
+++ b/rasa/core/training/dsl.py
@@ -171,7 +171,7 @@ async def read_from_folder(
         exclusion_percentage=None,
     ):
         """Given a path reads all contained story files."""
-        import rasa.nlu.utils as nlu_utils
+        import rasa.utils.io as io_utils
 
         if not os.path.exists(resource_name):
             raise ValueError(
@@ -181,7 +181,7 @@ async def read_from_folder(
             )
 
         story_steps = []
-        for f in nlu_utils.list_files(resource_name):
+        for f in io_utils.list_files(resource_name):
             steps = await StoryFileReader.read_from_file(
                 f, domain, interpreter, template_variables, use_e2e
             )
diff --git a/rasa/nlu/training_data/loading.py b/rasa/nlu/training_data/loading.py
index fcc14d6db656..8d03ce58b211 100644
--- a/rasa/nlu/training_data/loading.py
+++ b/rasa/nlu/training_data/loading.py
@@ -6,7 +6,6 @@
 import typing
 from typing import Optional, Text
 
-import rasa.utils.io
 from rasa.nlu import utils
 from rasa.nlu.training_data.formats import markdown
 from rasa.nlu.training_data.formats.dialogflow import (
@@ -57,7 +56,7 @@ def load_data(resource_name: Text, language: Optional[Text] = "en") -> "Training
     if not os.path.exists(resource_name):
         raise ValueError("File '{}' does not exist.".format(resource_name))
 
-    files = utils.list_files(resource_name)
+    files = io_utils.list_files(resource_name)
     data_sets = [_load(f, language) for f in files]
     data_sets = [ds for ds in data_sets if ds]
     if len(data_sets) == 0:
@@ -141,7 +140,7 @@ def guess_format(filename: Text) -> Text:
 
     content = ""
     try:
-        content = rasa.utils.io.read_file(filename)
+        content = io_utils.read_file(filename)
         js = json.loads(content)
     except ValueError:
         if any([marker in content for marker in _markdown_section_markers]):
diff --git a/rasa/nlu/training_data/util.py b/rasa/nlu/training_data/util.py
index 4ee1ee68a253..b7533c378fb5 100644
--- a/rasa/nlu/training_data/util.py
+++ b/rasa/nlu/training_data/util.py
@@ -4,7 +4,7 @@
 import os
 from typing import Text
 
-from rasa.nlu import utils
+import rasa.utils.io as io_utils
 
 logger = logging.getLogger(__name__)
 
@@ -35,7 +35,7 @@ def get_file_format(resource_name: Text) -> Text:
     if resource_name is None or not os.path.exists(resource_name):
         raise AttributeError("Resource '{}' does not exist.".format(resource_name))
 
-    files = utils.list_files(resource_name)
+    files = io_utils.list_files(resource_name)
 
     file_formats = list(map(lambda f: loading.guess_format(f), files))
 
diff --git a/rasa/nlu/utils/__init__.py b/rasa/nlu/utils/__init__.py
index 7025263ad0f9..7b08fe902a44 100644
--- a/rasa/nlu/utils/__init__.py
+++ b/rasa/nlu/utils/__init__.py
@@ -1,5 +1,4 @@
 import errno
-import glob
 import io
 import json
 import os
@@ -33,49 +32,6 @@ def create_dir(dir_path: Text) -> None:
             raise
 
 
-def list_directory(path: Text) -> List[Text]:
-    """Returns all files and folders excluding hidden files.
-
-    If the path points to a file, returns the file. This is a recursive
-    implementation returning files in any depth of the path."""
-
-    if not isinstance(path, str):
-        raise ValueError(
-            "`resource_name` must be a string type. "
-            "Got `{}` instead".format(type(path))
-        )
-
-    if os.path.isfile(path):
-        return [path]
-    elif os.path.isdir(path):
-        results = []
-        for base, dirs, files in os.walk(path):
-            # remove hidden files
-            goodfiles = filter(lambda x: not x.startswith("."), files)
-            results.extend(os.path.join(base, f) for f in goodfiles)
-        return results
-    else:
-        raise ValueError(
-            "Could not locate the resource '{}'.".format(os.path.abspath(path))
-        )
-
-
-def list_files(path: Text) -> List[Text]:
-    """Returns all files excluding hidden files.
-
-    If the path points to a file, returns the file."""
-
-    return [fn for fn in list_directory(path) if os.path.isfile(fn)]
-
-
-def list_subdirectories(path: Text) -> List[Text]:
-    """Returns all folders excluding hidden files.
-
-    If the path points to a file, returns an empty list."""
-
-    return [fn for fn in glob.glob(os.path.join(path, "*")) if os.path.isdir(fn)]
-
-
 def lazyproperty(fn: Callable) -> Any:
     """Allows to avoid recomputing a property over and over.
 
diff --git a/rasa/test.py b/rasa/test.py
index 0b58dd612646..c6a481443928 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -7,29 +7,32 @@
 from rasa.core.interpreter import RegexInterpreter
 
 from rasa.constants import DEFAULT_RESULTS_PATH, RESULTS_FILE, NUM_STORIES_FILE
-from rasa.model import get_model, get_model_subdirectories, unpack_model
+from rasa.model import get_model, get_model_subdirectories
 from rasa.cli.utils import minimal_kwargs, print_error, print_warning
 from rasa.exceptions import ModelNotFound
 
 logger = logging.getLogger(__name__)
 
 
-def test_compare_core(models: List[Text], stories: Text, output: Text):
-    from rasa.core.test import compare, plot_core_results
+def test_core_models_in_dir(model_directory: Text, stories: Text, output: Text):
+    from rasa.core.test import compare_models_in_dir, plot_core_results
     import rasa.utils.io
 
-    model_directory = copy_models_to_compare(models)
-
     loop = asyncio.get_event_loop()
-    loop.run_until_complete(compare(model_directory, stories, output))
+    loop.run_until_complete(compare_models_in_dir(model_directory, stories, output))
 
-    story_n_path = os.path.join(
-        os.path.dirname(os.path.dirname(models[0])), NUM_STORIES_FILE
-    )
+    story_n_path = os.path.join(model_directory, NUM_STORIES_FILE)
     number_of_stories = rasa.utils.io.read_json_file(story_n_path)
     plot_core_results(output, number_of_stories)
 
 
+def test_core_models(models: List[Text], stories: Text, output: Text):
+    from rasa.core.test import compare_models
+
+    loop = asyncio.get_event_loop()
+    loop.run_until_complete(compare_models(models, stories, output))
+
+
 def test(
     model: Text,
     stories: Text,
@@ -72,7 +75,7 @@ def test_core(
     except ModelNotFound:
         print_error(
             "Unable to test: could not find a model. Use 'rasa train' to train a "
-            "Rasa model."
+            "Rasa model and provide it via the '--model' argument."
         )
         return
 
@@ -80,8 +83,8 @@ def test_core(
 
     if not core_path:
         print_error(
-            "Unable to test: could not find a Core model. Use 'rasa train' to "
-            "train a model."
+            "Unable to test: could not find a Core model. Use 'rasa train' to train a "
+            "Rasa model and provide it via the '--model' argument."
         )
 
     use_e2e = kwargs["e2e"] if "e2e" in kwargs else False
@@ -113,7 +116,8 @@ def test_nlu(model: Optional[Text], nlu_data: Optional[Text], kwargs: Optional[D
         unpacked_model = get_model(model)
     except ModelNotFound:
         print_error(
-            "Could not find any model. Use 'rasa train nlu' to train an NLU model."
+            "Could not find any model. Use 'rasa train nlu' to train a "
+            "Rasa model and provide it via the '--model' argument."
         )
         return
 
@@ -124,7 +128,8 @@ def test_nlu(model: Optional[Text], nlu_data: Optional[Text], kwargs: Optional[D
         run_evaluation(nlu_data, nlu_model, **kwargs)
     else:
         print_error(
-            "Could not find any model. Use 'rasa train nlu' to train an NLU model."
+            "Could not find any model. Use 'rasa train nlu' to train a "
+            "Rasa model and provide it via the '--model' argument."
         )
 
 
@@ -200,18 +205,3 @@ def perform_nlu_cross_validation(
         logger.info("Entity evaluation results")
         return_entity_results(entity_results.train, "train")
         return_entity_results(entity_results.test, "test")
-
-
-def copy_models_to_compare(models: List[str]) -> Text:
-    models_dir = tempfile.mkdtemp()
-
-    for i, model in enumerate(models):
-        if os.path.exists(model) and os.path.isfile(model):
-            path = os.path.join(models_dir, "model_" + str(i))
-            unpack_model(model, path)
-        else:
-            logger.warning("Ignore '{}' as it is not a valid model file.".format(model))
-
-    logger.debug("Unpacked models to compare to '{}'".format(models_dir))
-
-    return models_dir
diff --git a/rasa/utils/io.py b/rasa/utils/io.py
index a9ad18baecdd..6be3ddc13480 100644
--- a/rasa/utils/io.py
+++ b/rasa/utils/io.py
@@ -6,6 +6,7 @@
 import tempfile
 import warnings
 import zipfile
+import glob
 from asyncio import AbstractEventLoop
 from typing import Text, Any, Dict, Union, List, Type, Callable
 import ruamel.yaml as yaml
@@ -300,3 +301,46 @@ def validate(document: Document) -> None:
                 raise ValidationError(message=error_message)
 
     return FunctionValidator
+
+
+def list_files(path: Text) -> List[Text]:
+    """Returns all files excluding hidden files.
+
+    If the path points to a file, returns the file."""
+
+    return [fn for fn in list_directory(path) if os.path.isfile(fn)]
+
+
+def list_subdirectories(path: Text) -> List[Text]:
+    """Returns all folders excluding hidden files.
+
+    If the path points to a file, returns an empty list."""
+
+    return [fn for fn in glob.glob(os.path.join(path, "*")) if os.path.isdir(fn)]
+
+
+def list_directory(path: Text) -> List[Text]:
+    """Returns all files and folders excluding hidden files.
+
+    If the path points to a file, returns the file. This is a recursive
+    implementation returning files in any depth of the path."""
+
+    if not isinstance(path, str):
+        raise ValueError(
+            "`resource_name` must be a string type. "
+            "Got `{}` instead".format(type(path))
+        )
+
+    if os.path.isfile(path):
+        return [path]
+    elif os.path.isdir(path):
+        results = []
+        for base, dirs, files in os.walk(path):
+            # remove hidden files
+            goodfiles = filter(lambda x: not x.startswith("."), files)
+            results.extend(os.path.join(base, f) for f in goodfiles)
+        return results
+    else:
+        raise ValueError(
+            "Could not locate the resource '{}'.".format(os.path.abspath(path))
+        )
diff --git a/tests/cli/test_rasa_train.py b/tests/cli/test_rasa_train.py
index a7077296cc19..d32523c04250 100644
--- a/tests/cli/test_rasa_train.py
+++ b/tests/cli/test_rasa_train.py
@@ -12,8 +12,7 @@
     CONFIG_MANDATORY_KEYS,
     CONFIG_MANDATORY_KEYS_NLU,
 )
-from rasa.nlu.utils import list_files, list_subdirectories
-from rasa.utils.io import write_yaml_file
+from rasa.utils.io import write_yaml_file, list_files, list_subdirectories
 
 
 def test_train(run_in_default_project):
diff --git a/tests/nlu/base/test_utils.py b/tests/nlu/base/test_utils.py
index 9767be6da86b..e17f67cc4109 100644
--- a/tests/nlu/base/test_utils.py
+++ b/tests/nlu/base/test_utils.py
@@ -5,7 +5,6 @@
 import pytest
 import tempfile
 import rasa.utils.io
-from rasa.nlu import utils
 from rasa.nlu.utils import (
     create_dir,
     is_model_dir,
@@ -36,13 +35,13 @@ def test_relative_normpath():
 
 def test_list_files_invalid_resource():
     with pytest.raises(ValueError) as execinfo:
-        utils.list_files(None)
+        io_utils.list_files(None)
     assert "must be a string type" in str(execinfo.value)
 
 
 def test_list_files_non_existing_dir():
     with pytest.raises(ValueError) as execinfo:
-        utils.list_files("my/made_up/path")
+        io_utils.list_files("my/made_up/path")
     assert "Could not locate the resource" in str(execinfo.value)
 
 
@@ -52,7 +51,7 @@ def test_list_files_ignores_hidden_files(tmpdir):
     # create a normal file
     normal_file = os.path.join(tmpdir.strpath, "normal_file")
     open(normal_file, "a").close()
-    assert utils.list_files(tmpdir.strpath) == [normal_file]
+    assert io_utils.list_files(tmpdir.strpath) == [normal_file]
 
 
 def test_creation_of_existing_dir(tmpdir):

From 85388c16acbf7c3dec7ae1c5b8ca56abc35c8dd8 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Mon, 22 Jul 2019 14:38:00 +0200
Subject: [PATCH 09/31] Clean up.

---
 rasa/cli/test.py                  |  3 ++-
 rasa/core/test.py                 | 10 +++++-----
 rasa/nlu/model.py                 |  4 ++--
 rasa/nlu/test.py                  |  6 +++---
 rasa/nlu/utils/__init__.py        | 13 -------------
 rasa/test.py                      | 18 +++++++-----------
 rasa/utils/io.py                  | 13 +++++++++++++
 tests/nlu/base/test_evaluation.py |  4 ++--
 tests/nlu/base/test_utils.py      |  3 +--
 9 files changed, 35 insertions(+), 39 deletions(-)

diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index 926d97b3b30f..dc556924ab92 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -3,7 +3,6 @@
 import os
 from typing import List
 
-from rasa import data
 from rasa.cli.arguments import test as arguments
 from rasa.cli.utils import get_validated_path
 from rasa.constants import (
@@ -59,6 +58,7 @@ def add_subparser(
 
 
 def test_core(args: argparse.Namespace) -> None:
+    from rasa import data
     from rasa.test import test_core
 
     endpoints = get_validated_path(
@@ -93,6 +93,7 @@ def test_core(args: argparse.Namespace) -> None:
 
 
 def test_nlu(args: argparse.Namespace) -> None:
+    from rasa import data
     from rasa.test import test_nlu, perform_nlu_cross_validation
     import rasa.utils.io
 
diff --git a/rasa/core/test.py b/rasa/core/test.py
index fd5b928cb1f5..5deb02936158 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -590,7 +590,7 @@ def plot_story_evaluation(
 async def compare_models_in_dir(
     model_dir: Text, stories_file: Text, output: Text
 ) -> None:
-    """Evaluates multiple trained models on a test set."""
+    """Evaluates multiple trained models in a directory on a test set."""
     from rasa.core import utils
     import rasa.utils.io as io_utils
 
@@ -603,7 +603,7 @@ async def compare_models_in_dir(
             model_name = "".join(
                 [i for i in os.path.basename(model) if not i.isdigit()]
             )
-            correct_stories = await _compare(model, stories_file)
+            correct_stories = await _get_no_correct_stories(model, stories_file)
             num_correct_run[model_name].append(correct_stories)
 
         for k, v in num_correct_run.items():
@@ -613,19 +613,19 @@ async def compare_models_in_dir(
 
 
 async def compare_models(models: List[Text], stories_file: Text, output: Text) -> None:
-    """Evaluates multiple trained models on a test set."""
+    """Evaluates provided trained models on a test set."""
     from rasa.core import utils
 
     num_correct = defaultdict(list)
 
     for model in models:
-        correct_stories = await _compare(model, stories_file)
+        correct_stories = await _get_no_correct_stories(model, stories_file)
         num_correct[os.path.basename(model)].append(correct_stories)
 
     utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
 
 
-async def _compare(model: Text, stories_file: Text) -> int:
+async def _get_no_correct_stories(model: Text, stories_file: Text) -> int:
     from rasa.core.agent import Agent
 
     logger.info("Evaluating model {}".format(model))
diff --git a/rasa/nlu/model.py b/rasa/nlu/model.py
index 863b951f1c91..1e2d1207513c 100644
--- a/rasa/nlu/model.py
+++ b/rasa/nlu/model.py
@@ -15,7 +15,7 @@
 from rasa.nlu.config import RasaNLUModelConfig, component_config_from_pipeline
 from rasa.nlu.persistor import Persistor
 from rasa.nlu.training_data import TrainingData, Message
-from rasa.nlu.utils import create_dir, write_json_to_file
+from rasa.nlu.utils import write_json_to_file
 import rasa.utils.io
 
 MODEL_NAME_PREFIX = "nlu_"
@@ -221,7 +221,7 @@ def persist(
         path = os.path.abspath(path)
         dir_name = os.path.join(path, model_name)
 
-        create_dir(dir_name)
+        rasa.utils.io.create_dir(dir_name)
 
         if self.training_data:
             metadata.update(self.training_data.persist(dir_name))
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index 8609f3eee3bd..5310323ddd20 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -19,7 +19,7 @@
 
 from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE
 from rasa.model import get_model
-from rasa.utils.io import create_path
+from rasa.utils.io import create_path, create_dir
 from rasa.nlu import config, training_data, utils
 from rasa.nlu.utils import write_to_file
 from rasa.nlu.components import ComponentBuilder
@@ -713,7 +713,7 @@ def run_evaluation(
     }  # type: Dict[Text, Optional[Dict]]
 
     if report:
-        utils.create_dir(report)
+        create_dir(report)
 
     intent_results, entity_results = get_eval_data(interpreter, test_data)
 
@@ -830,7 +830,7 @@ def cross_validate(
         nlu_config = config.load(nlu_config)
 
     if report:
-        utils.create_dir(report)
+        create_dir(report)
 
     trainer = Trainer(nlu_config)
     trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
diff --git a/rasa/nlu/utils/__init__.py b/rasa/nlu/utils/__init__.py
index 7b08fe902a44..210fbb9ae4ee 100644
--- a/rasa/nlu/utils/__init__.py
+++ b/rasa/nlu/utils/__init__.py
@@ -19,19 +19,6 @@ def relative_normpath(f: Optional[Text], path: Text) -> Optional[Text]:
         return None
 
 
-def create_dir(dir_path: Text) -> None:
-    """Creates a directory and its super paths.
-
-    Succeeds even if the path already exists."""
-
-    try:
-        os.makedirs(dir_path)
-    except OSError as e:
-        # be happy if someone already created the path
-        if e.errno != errno.EEXIST:
-            raise
-
-
 def lazyproperty(fn: Callable) -> Any:
     """Allows to avoid recomputing a property over and over.
 
diff --git a/rasa/test.py b/rasa/test.py
index c6a481443928..ddb1008606be 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -1,13 +1,10 @@
 import asyncio
 import logging
-import tempfile
-from typing import Text, Dict, Optional, List, Any
 import os
+from typing import Text, Dict, Optional, List, Any
 
-from rasa.core.interpreter import RegexInterpreter
-
+import rasa.utils.io as io_utils
 from rasa.constants import DEFAULT_RESULTS_PATH, RESULTS_FILE, NUM_STORIES_FILE
-from rasa.model import get_model, get_model_subdirectories
 from rasa.cli.utils import minimal_kwargs, print_error, print_warning
 from rasa.exceptions import ModelNotFound
 
@@ -16,13 +13,12 @@
 
 def test_core_models_in_dir(model_directory: Text, stories: Text, output: Text):
     from rasa.core.test import compare_models_in_dir, plot_core_results
-    import rasa.utils.io
 
     loop = asyncio.get_event_loop()
     loop.run_until_complete(compare_models_in_dir(model_directory, stories, output))
 
     story_n_path = os.path.join(model_directory, NUM_STORIES_FILE)
-    number_of_stories = rasa.utils.io.read_json_file(story_n_path)
+    number_of_stories = io_utils.read_json_file(story_n_path)
     plot_core_results(output, number_of_stories)
 
 
@@ -57,9 +53,8 @@ def test_core(
 ):
     import rasa.core.test
     import rasa.core.utils as core_utils
-    from rasa.nlu import utils as nlu_utils
-    from rasa.model import get_model
-    from rasa.core.interpreter import NaturalLanguageInterpreter
+    from rasa.model import get_model, get_model_subdirectories
+    from rasa.core.interpreter import RegexInterpreter, NaturalLanguageInterpreter
     from rasa.core.agent import Agent
 
     _endpoints = core_utils.AvailableEndpoints.read_endpoints(endpoints)
@@ -68,7 +63,7 @@ def test_core(
         kwargs = {}
 
     if output:
-        nlu_utils.create_dir(output)
+        io_utils.create_dir(output)
 
     try:
         unpacked_model = get_model(model)
@@ -111,6 +106,7 @@ def test_core(
 
 def test_nlu(model: Optional[Text], nlu_data: Optional[Text], kwargs: Optional[Dict]):
     from rasa.nlu.test import run_evaluation
+    from rasa.model import get_model
 
     try:
         unpacked_model = get_model(model)
diff --git a/rasa/utils/io.py b/rasa/utils/io.py
index 6be3ddc13480..101cdacacaeb 100644
--- a/rasa/utils/io.py
+++ b/rasa/utils/io.py
@@ -344,3 +344,16 @@ def list_directory(path: Text) -> List[Text]:
         raise ValueError(
             "Could not locate the resource '{}'.".format(os.path.abspath(path))
         )
+
+
+def create_dir(dir_path: Text) -> None:
+    """Creates a directory and its super paths.
+
+    Succeeds even if the path already exists."""
+
+    try:
+        os.makedirs(dir_path)
+    except OSError as e:
+        # be happy if someone already created the path
+        if e.errno != errno.EEXIST:
+            raise
diff --git a/tests/nlu/base/test_evaluation.py b/tests/nlu/base/test_evaluation.py
index 9a61c292001e..5be71efc5e9d 100644
--- a/tests/nlu/base/test_evaluation.py
+++ b/tests/nlu/base/test_evaluation.py
@@ -264,7 +264,7 @@ def test_intent_evaluation_report(tmpdir_factory):
     report_folder = os.path.join(path, "reports")
     report_filename = os.path.join(report_folder, "intent_report.json")
 
-    utils.create_dir(report_folder)
+    rasa.utils.io.create_dir(report_folder)
 
     intent_results = [
         IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345),
@@ -319,7 +319,7 @@ def __init__(self, component_config=None) -> None:
     report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json")
     report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json")
 
-    utils.create_dir(report_folder)
+    rasa.utils.io.create_dir(report_folder)
     mock_interpreter = Interpreter(
         [
             EntityExtractorA({"provides": ["entities"]}),
diff --git a/tests/nlu/base/test_utils.py b/tests/nlu/base/test_utils.py
index e17f67cc4109..c49c1079da02 100644
--- a/tests/nlu/base/test_utils.py
+++ b/tests/nlu/base/test_utils.py
@@ -6,7 +6,6 @@
 import tempfile
 import rasa.utils.io
 from rasa.nlu.utils import (
-    create_dir,
     is_model_dir,
     is_url,
     ordered,
@@ -56,7 +55,7 @@ def test_list_files_ignores_hidden_files(tmpdir):
 
 def test_creation_of_existing_dir(tmpdir):
     # makes sure there is no exception
-    assert create_dir(tmpdir.strpath) is None
+    assert rasa.utils.io.create_dir(tmpdir.strpath) is None
 
 
 def test_ordered():

From 22d7e6cf6f6fd8540014c8ab40e97cd4c1c571e1 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Mon, 22 Jul 2019 14:53:22 +0200
Subject: [PATCH 10/31] Add tests.

---
 tests/cli/test_rasa_test.py | 77 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index 3fd4c580222c..fae8b5ef3616 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -1,4 +1,7 @@
 import os
+from shutil import copyfile
+from constants import DEFAULT_RESULTS_PATH, RESULTS_FILE
+from rasa.utils.io import list_files, write_yaml_file
 
 
 def test_test_core(run_in_default_project):
@@ -34,8 +37,6 @@ def test_test_nlu_cross_validation(run_in_default_project):
 
 
 def test_test_nlu_comparison(run_in_default_project):
-    from shutil import copyfile
-
     copyfile("config.yml", "nlu-config.yml")
 
     run_in_default_project(
@@ -53,6 +54,78 @@ def test_test_nlu_comparison(run_in_default_project):
     assert os.path.exists("nlu-report")
 
 
+def test_test_core_comparison(run_in_default_project):
+    files = list_files("models")
+    copyfile(files[0], "models/copy-model.tar.gz")
+
+    run_in_default_project(
+        "test",
+        "core",
+        "-m",
+        files[0],
+        "models/copy-model.tar.gz",
+        "--stories",
+        "data/stories.md",
+    )
+
+    assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
+
+
+def test_test_core_comparison_after_train(run_in_default_project):
+    write_yaml_file(
+        {
+            "language": "en",
+            "pipeline": "supervised_embeddings",
+            "policies": [{"name": "KerasPolicy"}],
+        },
+        "config_1.yml",
+    )
+
+    write_yaml_file(
+        {
+            "language": "en",
+            "pipeline": "supervised_embeddings",
+            "policies": [{"name": "MemoizationPolicy"}],
+        },
+        "config_2.yml",
+    )
+    run_in_default_project(
+        "train",
+        "core",
+        "-c",
+        "config.yml",
+        "config-2.yml",
+        "--stories",
+        "data/stories",
+        "--run",
+        "2",
+        "--percentages",
+        "25",
+        "75",
+        "--out",
+        "comparison_models",
+    )
+
+    assert os.path.exists("comparison_models")
+    assert os.path.exists("comparison_models/run_1")
+    assert os.path.exists("comparison_models/run_2")
+
+    run_in_default_project(
+        "test",
+        "core",
+        "-m",
+        "comparison_models",
+        "--stories",
+        "data/stories",
+        "--evaluate-models-in-dir",
+    )
+
+    assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
+    assert os.path.exists(
+        os.path.join(DEFAULT_RESULTS_PATH, "comparison_models_graph.pdf")
+    )
+
+
 def test_test_help(run):
     output = run("test", "--help")
 

From 0b02e6fbf4656de612ffcc8cc20f8f6cf51b9076 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Mon, 22 Jul 2019 15:06:04 +0200
Subject: [PATCH 11/31] Add changelog entry.

---
 CHANGELOG.rst               |  3 ++-
 tests/cli/test_rasa_test.py | 12 +++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 845949c8d8a1..d02a2c47d624 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,7 +11,7 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0.
 
 Added
 -----
-
+- add ``--evaluate-models-in-dir`` to ``rasa test core`` to evaluate models from ``rasa train core -c <config-1> <config-2>``
 
 Changed
 -------
@@ -25,6 +25,7 @@ Removed
 Fixed
 -----
 - ``rasa train core`` in comparison mode stores the model files compressed (``tar.gz`` files)
+- ``rasa test core`` can handle compressed model files
 
 
 [1.1.7] - 2019-07-18
diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index fae8b5ef3616..7661f8111714 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -93,15 +93,17 @@ def test_test_core_comparison_after_train(run_in_default_project):
         "train",
         "core",
         "-c",
-        "config.yml",
-        "config-2.yml",
+        "config_1.yml",
+        "config_2.yml",
         "--stories",
-        "data/stories",
-        "--run",
+        "data/stories.md",
+        "--runs",
         "2",
         "--percentages",
         "25",
         "75",
+        "--augmentation",
+        "5",
         "--out",
         "comparison_models",
     )
@@ -122,7 +124,7 @@ def test_test_core_comparison_after_train(run_in_default_project):
 
     assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
     assert os.path.exists(
-        os.path.join(DEFAULT_RESULTS_PATH, "comparison_models_graph.pdf")
+        os.path.join(DEFAULT_RESULTS_PATH, "core_model_comparison_graph.pdf")
     )
 
 

From fb3eb9831a12516ded65c4612f4cfc4f50c49ccf Mon Sep 17 00:00:00 2001
From: Tom Metcalfe <t.metcalfe@rasa.com>
Date: Tue, 23 Jul 2019 16:00:12 +0100
Subject: [PATCH 12/31] Add content menus to relevant pages, restructuring

---
 docs/core/actions.rst              | 39 +++++++++++++++---------------
 docs/core/domains.rst              | 12 ++++++---
 docs/core/interactive-learning.rst |  2 ++
 docs/core/slots.rst                |  6 +++++
 docs/nlu/choosing-a-pipeline.rst   | 16 +++++++-----
 docs/nlu/language-support.rst      |  3 +++
 docs/nlu/training-data-format.rst  |  6 ++---
 7 files changed, 53 insertions(+), 31 deletions(-)

diff --git a/docs/core/actions.rst b/docs/core/actions.rst
index 889de3299c89..1789afdf8fd4 100644
--- a/docs/core/actions.rst
+++ b/docs/core/actions.rst
@@ -6,16 +6,17 @@
 Actions
 =======
 
-.. contents::
-
 Actions are the things your bot runs in response to user input.
 There are three kinds of actions in Rasa Core:
 
- 1. **Default actions**: e.g. ``action_listen``, ``action_restart``,
-    ``action_default_fallback``
- 2. **Utterance actions**: start with ``utter_``, just send a message
+ 1. **Utterance actions**: start with ``utter_``, just send a message
     to the user
- 3. **Custom actions**: any other action, these actions can run arbitrary code
+ 2. **Custom actions**: any other action, these actions can run arbitrary code
+ 3. **Default actions**: e.g. ``action_listen``, ``action_restart``,
+    ``action_default_fallback``
+
+.. contents::
+   :local:
 
 Utterance Actions
 -----------------
@@ -66,7 +67,7 @@ other language and define your actions there - but we provide
 a small python SDK to make development there even easier.
 
 Custom Actions Written in Python
---------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 For actions written in python, we have a convenient SDK which starts
 this action server for you.
@@ -144,6 +145,18 @@ There is an example of a ``SlotSet`` event
 :ref:`above <custom_action_example>`, and a full list of possible
 events in :ref:`Events <events>`.
 
+Execute Actions in Other Code
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Rasa Core will send an HTTP ``POST`` request to your server containing
+information on which action to run. Furthermore, this request will contain all
+information about the conversation.
+
+As a response to the action call from Core, you can modify the tracker,
+e.g. by setting slots and send responses back to the user.
+All of the modifications are done using events.
+There is a list of all possible event types in :ref:`events`.
+
 Default Actions
 ---------------
 
@@ -195,15 +208,3 @@ to the list of actions in your domain:
 
 Rasa Core will then call your action endpoint and treat it as every other
 custom action.
-
-Execute Actions in Other Code
------------------------------
-
-Rasa Core will send an HTTP ``POST`` request to your server containing
-information on which action to run. Furthermore, this request will contain all
-information about the conversation.
-
-As a response to the action call from Core, you can modify the tracker,
-e.g. by setting slots and send responses back to the user.
-All of the modifications are done using events.
-There is a list of all possible event types in :ref:`events`.
diff --git a/docs/core/domains.rst b/docs/core/domains.rst
index ffe0515ae480..1225e944daa8 100644
--- a/docs/core/domains.rst
+++ b/docs/core/domains.rst
@@ -11,6 +11,12 @@ It specifies the ``intents``, ``entities``, ``slots``, and ``actions``
 your bot should know about. Optionally, it can also include ``templates``
 for the things your bot can say.
 
+.. contents::
+   :local:
+
+
+An example of a Domain
+----------------------
 
 As an example, the ``DefaultDomain`` has the following yaml definition:
 
@@ -51,7 +57,7 @@ For example, an action could:
 * just about anything!
 
 Custom Actions and Slots
-^^^^^^^^^^^^^^^^^^^^^^^^
+------------------------
 
 To reference slots in your domain, you need to reference them by
 their **module path**. To reference custom actions, use their **name**.
@@ -76,7 +82,7 @@ see :ref:`custom-actions`).
 .. _utter_templates:
 
 Utterance templates
-^^^^^^^^^^^^^^^^^^^
+-------------------
 
 Utterance templates are messages the bot will send back to the user. There are
 two ways to use these templates:
@@ -270,7 +276,7 @@ multiple responses and Rasa will randomly pick one of them, e.g.:
     - text: "Hey, {name}. How is your day going?"
 
 Ignoring entities for certain intents
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-------------------------------------
 
 If you want all entities to be ignored for certain intents, you can
 add the ``use_entities: []`` parameter to the intent in your domain
diff --git a/docs/core/interactive-learning.rst b/docs/core/interactive-learning.rst
index 616bdbdeb1c9..9b56ff2477c7 100644
--- a/docs/core/interactive-learning.rst
+++ b/docs/core/interactive-learning.rst
@@ -22,6 +22,8 @@ Some people call this `Software 2.0 <https://medium.com/@karpathy/software-2-0-a
     Rasa X provides a UI for interactive learning, and you can use any user conversation
     as a starting point. See `Annotate Conversations <https://rasa.com/docs/rasa-x/annotate-conversations/>`_ in the Rasa X docs.
 
+.. contents::
+   :local:
 
 Running Interactive Learning
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/docs/core/slots.rst b/docs/core/slots.rst
index 46d34c5b78fb..3741bacf88e2 100644
--- a/docs/core/slots.rst
+++ b/docs/core/slots.rst
@@ -7,6 +7,12 @@
 Slots
 =====
 
+.. contents::
+   :local:
+
+What are slots?
+---------------
+
 **Slots are your bot's memory.** They act as a key-value store
 which can be used to store information the user provided (e.g their home city)
 as well as information gathered about the outside world (e.g. the result of a
diff --git a/docs/nlu/choosing-a-pipeline.rst b/docs/nlu/choosing-a-pipeline.rst
index 91c24e3ae689..43fe5909dbf6 100644
--- a/docs/nlu/choosing-a-pipeline.rst
+++ b/docs/nlu/choosing-a-pipeline.rst
@@ -10,6 +10,10 @@ Choosing a Pipeline
 Choosing an NLU pipeline allows you to customize your model and finetune
 it on your dataset.
 
+.. contents::
+   :local:
+
+
 The Short Answer
 ----------------
 
@@ -154,12 +158,12 @@ final context dictionary is used to persist the model's metadata.
 
 
 
-Returned Entities Object
-------------------------
-In the object returned after parsing there are two fields that show information
-about how the pipeline impacted the entities returned. The ``extractor`` field
-of an entity tells you which entity extractor found this particular entity.
-The ``processors`` field contains the name of components that altered this
+Reading an "entity" object
+--------------------------
+After parsing, the entity is returned as a dictionary.  There are two fields that show information
+about how the pipeline impacted the entities returned: the ``extractor`` field
+of an entity tells you which entity extractor found this particular entity, and
+the ``processors`` field contains the name of components that altered this
 specific entity.
 
 The use of synonyms can also cause the ``value`` field not match the ``text``
diff --git a/docs/nlu/language-support.rst b/docs/nlu/language-support.rst
index 1329648501a2..eeed4ebceabe 100644
--- a/docs/nlu/language-support.rst
+++ b/docs/nlu/language-support.rst
@@ -14,6 +14,9 @@ pipeline in :ref:`choosing-a-pipeline`.
 Other backends have some restrictions and support those languages
 which have pre-trained word vectors available.
 
+.. contents::
+   :local:
+
 
 Training a model in any language using the ``supervised_embeddings`` pipeline
 -----------------------------------------------------------------------------
diff --git a/docs/nlu/training-data-format.rst b/docs/nlu/training-data-format.rst
index b25d8660897c..000cf80321f1 100644
--- a/docs/nlu/training-data-format.rst
+++ b/docs/nlu/training-data-format.rst
@@ -6,12 +6,12 @@
 Training Data Format
 ====================
 
-Data Format
-~~~~~~~~~~~
-
 You can provide training data as Markdown or as JSON, as a single file or as a directory containing multiple files.
 Note that Markdown is usually easier to work with.
 
+.. contents::
+   :local:
+
 
 Markdown Format
 ---------------

From b0fb852b456dfdaab5e562de67a731f3f262d885 Mon Sep 17 00:00:00 2001
From: Tom Metcalfe <t.metcalfe@rasa.com>
Date: Wed, 24 Jul 2019 09:10:52 +0100
Subject: [PATCH 13/31] Edit entity object title

---
 docs/nlu/choosing-a-pipeline.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/nlu/choosing-a-pipeline.rst b/docs/nlu/choosing-a-pipeline.rst
index 43fe5909dbf6..b79c26dcdcef 100644
--- a/docs/nlu/choosing-a-pipeline.rst
+++ b/docs/nlu/choosing-a-pipeline.rst
@@ -158,8 +158,8 @@ final context dictionary is used to persist the model's metadata.
 
 
 
-Reading an "entity" object
---------------------------
+The "entity" object explained
+-----------------------------
 After parsing, the entity is returned as a dictionary.  There are two fields that show information
 about how the pipeline impacted the entities returned: the ``extractor`` field
 of an entity tells you which entity extractor found this particular entity, and

From a7ec152a415d3265b3d6246f8beb2b755b72bc9f Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Wed, 24 Jul 2019 10:41:11 +0200
Subject: [PATCH 14/31] clean up after merge

---
 CHANGELOG.rst              |  4 ++--
 rasa/cli/arguments/test.py |  4 ++--
 rasa/core/test.py          | 10 +++++-----
 rasa/core/train.py         |  1 -
 rasa/nlu/test.py           | 13 +++++++------
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index cc226f8d2da5..04041dbc653c 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -28,10 +28,10 @@ Removed
 
 Fixed
 -----
-- ``rasa train core`` in comparison mode stores the model files compressed (``tar.gz`` files)
-- ``rasa test core`` can handle compressed model files
 - added timeout to terminal input channel to avoid freezing input in case of server
   errors
+- ``rasa train core`` in comparison mode stores the model files compressed (``tar.gz`` files)
+- ``rasa test core`` can handle compressed model files
 
 
 [1.1.7] - 2019-07-18
diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
index 6a0c756f35cb..f91d7ec46e8f 100644
--- a/rasa/cli/arguments/test.py
+++ b/rasa/cli/arguments/test.py
@@ -75,8 +75,8 @@ def add_test_core_argument_group(
         default=False,
         action="store_true",
         help="Should be set to evaluate models trained via "
-        "`rasa train core --config <config-1> <config-2>`. "
-        "All models in the provided model folder are evaluated "
+        "'rasa train core --config <config-1> <config-2>'. "
+        "All models in the provided directory are evaluated "
         "and compared against each other.",
     )
 
diff --git a/rasa/core/test.py b/rasa/core/test.py
index 5deb02936158..8ed807301183 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -603,8 +603,8 @@ async def compare_models_in_dir(
             model_name = "".join(
                 [i for i in os.path.basename(model) if not i.isdigit()]
             )
-            correct_stories = await _get_no_correct_stories(model, stories_file)
-            num_correct_run[model_name].append(correct_stories)
+            no_of_correct_stories = await _evaluate_core_model(model, stories_file)
+            num_correct_run[model_name].append(no_of_correct_stories)
 
         for k, v in num_correct_run.items():
             num_correct[k].append(v)
@@ -619,13 +619,13 @@ async def compare_models(models: List[Text], stories_file: Text, output: Text) -
     num_correct = defaultdict(list)
 
     for model in models:
-        correct_stories = await _get_no_correct_stories(model, stories_file)
-        num_correct[os.path.basename(model)].append(correct_stories)
+        no_of_correct_stories = await _evaluate_core_model(model, stories_file)
+        num_correct[os.path.basename(model)].append(no_of_correct_stories)
 
     utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
 
 
-async def _get_no_correct_stories(model: Text, stories_file: Text) -> int:
+async def _evaluate_core_model(model: Text, stories_file: Text) -> int:
     from rasa.core.agent import Agent
 
     logger.info("Evaluating model {}".format(model))
diff --git a/rasa/core/train.py b/rasa/core/train.py
index 61333dbffd7e..ceccd08ef9f8 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -3,7 +3,6 @@
 import os
 import tempfile
 import typing
-from contextlib import ExitStack
 from typing import Dict, Optional, Text, Union, List
 
 from rasa.constants import NUMBER_OF_TRAINING_STORIES_FILE
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index 5310323ddd20..ff0bd366d25b 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -17,9 +17,10 @@
     Any,
 )
 
+import rasa.utils.io as io_utils
+
 from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE
 from rasa.model import get_model
-from rasa.utils.io import create_path, create_dir
 from rasa.nlu import config, training_data, utils
 from rasa.nlu.utils import write_to_file
 from rasa.nlu.components import ComponentBuilder
@@ -713,7 +714,7 @@ def run_evaluation(
     }  # type: Dict[Text, Optional[Dict]]
 
     if report:
-        create_dir(report)
+        io_utils.create_dir(report)
 
     intent_results, entity_results = get_eval_data(interpreter, test_data)
 
@@ -830,7 +831,7 @@ def cross_validate(
         nlu_config = config.load(nlu_config)
 
     if report:
-        create_dir(report)
+        io_utils.create_dir(report)
 
     trainer = Trainer(nlu_config)
     trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
@@ -947,10 +948,10 @@ def compare_nlu(
         logger.info("Beginning comparison run {}/{}".format(run + 1, runs))
 
         run_path = os.path.join(output, "run_{}".format(run + 1))
-        create_path(run_path)
+        io_utils.create_path(run_path)
 
         test_path = os.path.join(run_path, TEST_DATA_FILE)
-        create_path(test_path)
+        io_utils.create_path(test_path)
 
         train, test = data.train_test_split()
         write_to_file(test_path, test.as_markdown())
@@ -965,7 +966,7 @@ def compare_nlu(
 
             model_output_path = os.path.join(run_path, percent_string)
             train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE)
-            create_path(train_split_path)
+            io_utils.create_path(train_split_path)
             write_to_file(train_split_path, train.as_markdown())
 
             for nlu_config, model_name in zip(configs, model_names):

From 62a4e50d8ea2c09124f44c1c1f0c90737d618dba Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Wed, 24 Jul 2019 10:43:00 +0200
Subject: [PATCH 15/31] Update documentation.

---
 docs/user-guide/evaluating-models.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/user-guide/evaluating-models.rst b/docs/user-guide/evaluating-models.rst
index b27837bde8a7..2a24500024f7 100644
--- a/docs/user-guide/evaluating-models.rst
+++ b/docs/user-guide/evaluating-models.rst
@@ -204,11 +204,11 @@ mode to evaluate the models you just trained:
 
 .. code-block:: bash
 
-  $ rasa test core -m comparison_models/<model-1>.tar.gz comparison_models/<model-2>.tar.gz \
-    --stories stories_folder --out comparison_results
+  $ rasa test core -m comparison_models --stories stories_folder
+  --out comparison_results --evaluate-models-in-dir
 
 This will evaluate each of the models on the training set and plot some graphs
-to show you which policy performs best.  By evaluating on the full set of stories, you
+to show you which policy performs best. By evaluating on the full set of stories, you
 can measure how well Rasa Core is predicting the held-out stories.
 
 If you're not sure which policies to compare, we'd recommend trying out the

From a36f8668eeb15c017f79a44afd55c2cc7df3a411 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Wed, 24 Jul 2019 11:35:12 +0200
Subject: [PATCH 16/31] Fix import issue.

---
 tests/cli/test_rasa_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index 7661f8111714..c42e0a9d77bb 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -1,6 +1,6 @@
 import os
 from shutil import copyfile
-from constants import DEFAULT_RESULTS_PATH, RESULTS_FILE
+from rasa.constants import DEFAULT_RESULTS_PATH, RESULTS_FILE
 from rasa.utils.io import list_files, write_yaml_file
 
 

From 2d74a8355b63f587c6e1a7d69027a84982fe237d Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Wed, 24 Jul 2019 14:44:37 +0200
Subject: [PATCH 17/31] Fix comparison training.

---
 rasa/core/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/train.py b/rasa/core/train.py
index ceccd08ef9f8..f2c374ed09e4 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -118,7 +118,7 @@ async def train_comparison_models(
                         file_importer,
                         train_path,
                         policy_config=policy_config,
-                        exclusion_percentage=current_run,
+                        exclusion_percentage=percentage,
                         kwargs=kwargs,
                         dump_stories=dump_stories,
                     )

From e6c86f5a50cd509814998a14d898b13305f2aada Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Thu, 25 Jul 2019 15:04:35 +0200
Subject: [PATCH 18/31] Update changelog.

---
 CHANGELOG.rst | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 412f79e7f0d9..a37b68226dc6 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,9 +6,26 @@ Rasa Change Log
 All notable changes to this project will be documented in this file.
 This project adheres to `Semantic Versioning`_ starting with version 1.0.
 
-[Unreleased 1.1.8] - `master`_
+
+[Unreleased 1.1.9] - `master`_
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Added
+-----
+
+Changed
+-------
+
+Removed
+-------
+
+Fixed
+-----
+
+
+[1.1.8] - 2019-07-25
+^^^^^^^^^^^^^^^^^^^^
+
 Added
 -----
 - ``TrainingFileImporter`` interface to support customizing the process of loading
@@ -21,10 +38,6 @@ Changed
   or a policy ensemble
 - Update pytype to ``2019.7.11``
 
-Removed
--------
-
-
 Fixed
 -----
 - interactive learning bug where reverted user utterances were dumped to training data

From da8b12dcb250027b82163f2228aaa5df39bedee5 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Thu, 25 Jul 2019 15:06:42 +0200
Subject: [PATCH 19/31] Update rasa version

---
 rasa/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/version.py b/rasa/version.py
index bf7882637e01..3d30a5ffbec8 100644
--- a/rasa/version.py
+++ b/rasa/version.py
@@ -1 +1 @@
-__version__ = "1.1.7"
+__version__ = "1.1.8"

From 5e3800bf0b96eac2dab2396f9f8bc448f0318697 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Thu, 25 Jul 2019 15:07:22 +0200
Subject: [PATCH 20/31] Add missing fix for rasa train

---
 rasa/core/train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/rasa/core/train.py b/rasa/core/train.py
index ceccd08ef9f8..f2c374ed09e4 100644
--- a/rasa/core/train.py
+++ b/rasa/core/train.py
@@ -118,7 +118,7 @@ async def train_comparison_models(
                         file_importer,
                         train_path,
                         policy_config=policy_config,
-                        exclusion_percentage=current_run,
+                        exclusion_percentage=percentage,
                         kwargs=kwargs,
                         dump_stories=dump_stories,
                     )

From 77ce290e3437cbd7222218e34a4ef1b4c7de00de Mon Sep 17 00:00:00 2001
From: Ella <erohmensing@gmail.com>
Date: Thu, 25 Jul 2019 17:23:24 +0200
Subject: [PATCH 21/31] add remote storage to init args

---
 rasa/cli/scaffold.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rasa/cli/scaffold.py b/rasa/cli/scaffold.py
index a9062dc426e0..5b0ebb9a8bed 100644
--- a/rasa/cli/scaffold.py
+++ b/rasa/cli/scaffold.py
@@ -81,6 +81,7 @@ def print_run_or_instructions(args: argparse.Namespace, path: Text) -> None:
             "jwt_secret",
             "jwt_method",
             "enable_api",
+            "remote_storage",
         ]
         for a in attributes:
             setattr(args, a, None)

From da09ca9a0568ba8e4b08e24c238e7455629111db Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Thu, 25 Jul 2019 17:31:47 +0200
Subject: [PATCH 22/31] Update default percentages.

---
 rasa/cli/arguments/data.py  | 1 -
 rasa/cli/arguments/test.py  | 2 +-
 rasa/cli/arguments/train.py | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/rasa/cli/arguments/data.py b/rasa/cli/arguments/data.py
index b951cff83bac..f70396c8be17 100644
--- a/rasa/cli/arguments/data.py
+++ b/rasa/cli/arguments/data.py
@@ -4,7 +4,6 @@
     add_nlu_data_param,
     add_out_param,
     add_data_param,
-    add_stories_param,
     add_domain_param,
 )
 
diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
index f91d7ec46e8f..196e4ec9d68b 100644
--- a/rasa/cli/arguments/test.py
+++ b/rasa/cli/arguments/test.py
@@ -159,7 +159,7 @@ def add_test_nlu_argument_group(
         required=False,
         nargs="+",
         type=int,
-        default=[0, 25, 50, 75, 90],
+        default=[25, 50, 75, 90],
         help="Percentages of training data to exclude during comparison.",
     )
 
diff --git a/rasa/cli/arguments/train.py b/rasa/cli/arguments/train.py
index 392618684171..3847b371763d 100644
--- a/rasa/cli/arguments/train.py
+++ b/rasa/cli/arguments/train.py
@@ -88,7 +88,7 @@ def add_compare_params(
         "--percentages",
         nargs="*",
         type=int,
-        default=[0, 5, 25, 50, 70, 90, 95],
+        default=[25, 50, 75, 90],
         help="Range of exclusion percentages.",
     )
     parser.add_argument(

From d533f16d9fa2c5a93d01b468dda5f73c063740fa Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 09:28:43 +0200
Subject: [PATCH 23/31] Review comments.

---
 CHANGELOG.rst                         |  2 +-
 docs/user-guide/evaluating-models.rst |  2 +-
 rasa/cli/arguments/test.py            |  6 ++--
 rasa/cli/arguments/train.py           |  2 +-
 rasa/cli/test.py                      | 43 +++++++++++++++------------
 rasa/core/test.py                     | 41 ++++++++++++++-----------
 rasa/core/training/dsl.py             |  6 ++--
 rasa/nlu/model.py                     |  2 +-
 rasa/nlu/test.py                      |  4 +--
 rasa/test.py                          | 10 +++----
 rasa/utils/io.py                      |  4 +--
 tests/nlu/base/test_evaluation.py     |  4 +--
 tests/nlu/base/test_utils.py          |  2 +-
 13 files changed, 68 insertions(+), 60 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index d1af071496ae..5905ef976c98 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -11,7 +11,7 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0.
 
 Added
 -----
-- add ``--evaluate-models-in-dir`` to ``rasa test core`` to evaluate models from ``rasa train core -c <config-1> <config-2>``
+- add ``--evaluate-model-directory`` to ``rasa test core`` to evaluate models from ``rasa train core -c <config-1> <config-2>``
 - ``TrainingFileImporter`` interface to support customizing the process of loading
   training data
 - Fill slots for custom templates
diff --git a/docs/user-guide/evaluating-models.rst b/docs/user-guide/evaluating-models.rst
index 2a24500024f7..976cac7fb82d 100644
--- a/docs/user-guide/evaluating-models.rst
+++ b/docs/user-guide/evaluating-models.rst
@@ -205,7 +205,7 @@ mode to evaluate the models you just trained:
 .. code-block:: bash
 
   $ rasa test core -m comparison_models --stories stories_folder
-  --out comparison_results --evaluate-models-in-dir
+  --out comparison_results --evaluate-model-directory
 
 This will evaluate each of the models on the training set and plot some graphs
 to show you which policy performs best. By evaluating on the full set of stories, you
diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
index 196e4ec9d68b..6e62ee2a6144 100644
--- a/rasa/cli/arguments/test.py
+++ b/rasa/cli/arguments/test.py
@@ -71,7 +71,7 @@ def add_test_core_argument_group(
         "to the supplied URL.",
     )
     parser.add_argument(
-        "--evaluate-models-in-dir",
+        "--evaluate-model-directory",
         default=False,
         action="store_true",
         help="Should be set to evaluate models trained via "
@@ -159,7 +159,7 @@ def add_test_nlu_argument_group(
         required=False,
         nargs="+",
         type=int,
-        default=[25, 50, 75, 90],
+        default=[0, 25, 50, 75, 90],
         help="Percentages of training data to exclude during comparison.",
     )
 
@@ -173,6 +173,6 @@ def add_test_core_model_param(parser: argparse.ArgumentParser):
         default=[default_path],
         help="Path to a pre-trained model. If it is a 'tar.gz' file that model file "
         "will be used. If it is a directory, the latest model in that directory "
-        "will be used (exception: '--evaluate-models-in-dir' flag is set). If multiple "
+        "will be used (exception: '--evaluate-model-directory' flag is set). If multiple "
         "'tar.gz' files are provided, all those models will be compared.",
     )
diff --git a/rasa/cli/arguments/train.py b/rasa/cli/arguments/train.py
index 3847b371763d..06efd92ab175 100644
--- a/rasa/cli/arguments/train.py
+++ b/rasa/cli/arguments/train.py
@@ -88,7 +88,7 @@ def add_compare_params(
         "--percentages",
         nargs="*",
         type=int,
-        default=[25, 50, 75, 90],
+        default=[0, 25, 50, 75, 90],
         help="Range of exclusion percentages.",
     )
     parser.add_argument(
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index dc556924ab92..c5c294fa8c1a 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -4,7 +4,6 @@
 from typing import List
 
 from rasa.cli.arguments import test as arguments
-from rasa.cli.utils import get_validated_path
 from rasa.constants import (
     DEFAULT_CONFIG_PATH,
     DEFAULT_DATA_PATH,
@@ -14,8 +13,9 @@
     DEFAULT_NLU_RESULTS_PATH,
     CONFIG_SCHEMA_FILE,
 )
-from rasa.test import test_core_models, compare_nlu_models, test_core_models_in_dir
-from rasa.utils.validation import validate_yaml_schema, InvalidYamlFileError
+import rasa.test as rasa_test
+import rasa.utils.validation as validation_utils
+import rasa.cli.utils as cli_utils
 
 logger = logging.getLogger(__name__)
 
@@ -61,10 +61,10 @@ def test_core(args: argparse.Namespace) -> None:
     from rasa import data
     from rasa.test import test_core
 
-    endpoints = get_validated_path(
+    endpoints = cli_utils.get_validated_path(
         args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
     )
-    stories = get_validated_path(args.stories, "stories", DEFAULT_DATA_PATH)
+    stories = cli_utils.get_validated_path(args.stories, "stories", DEFAULT_DATA_PATH)
     stories = data.get_core_directory(stories)
     output = args.out or DEFAULT_RESULTS_PATH
 
@@ -75,10 +75,12 @@ def test_core(args: argparse.Namespace) -> None:
         args.model = args.model[0]
 
     if isinstance(args.model, str):
-        model_path = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH)
+        model_path = cli_utils.get_validated_path(
+            args.model, "model", DEFAULT_MODELS_PATH
+        )
 
         if args.evaluate_models_in_dir:
-            test_core_models_in_dir(args.model, stories, output)
+            rasa_test.test_core_models_in_directory(args.model, stories, output)
         else:
             test_core(
                 model=model_path,
@@ -89,15 +91,14 @@ def test_core(args: argparse.Namespace) -> None:
             )
 
     else:
-        test_core_models(args.model, stories, output)
+        rasa_test.test_core_models(args.model, stories, output)
 
 
 def test_nlu(args: argparse.Namespace) -> None:
     from rasa import data
-    from rasa.test import test_nlu, perform_nlu_cross_validation
-    import rasa.utils.io
+    import rasa.utils.io as io_utils
 
-    nlu_data = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)
+    nlu_data = cli_utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)
     nlu_data = data.get_nlu_directory(nlu_data)
 
     if args.config is not None and len(args.config) == 1:
@@ -118,20 +119,20 @@ def test_nlu(args: argparse.Namespace) -> None:
         config_files = []
         for file in args.config:
             try:
-                validate_yaml_schema(
-                    rasa.utils.io.read_file(file),
+                validation_utils.validate_yaml_schema(
+                    io_utils.read_file(file),
                     CONFIG_SCHEMA_FILE,
                     show_validation_errors=False,
                 )
                 config_files.append(file)
-            except InvalidYamlFileError:
+            except validation_utils.InvalidYamlFileError:
                 logger.debug(
                     "Ignoring file '{}' as it is not a valid config file.".format(file)
                 )
                 continue
 
         output = args.report or DEFAULT_NLU_RESULTS_PATH
-        compare_nlu_models(
+        rasa_test.compare_nlu_models(
             configs=config_files,
             nlu=nlu_data,
             output=output,
@@ -140,11 +141,15 @@ def test_nlu(args: argparse.Namespace) -> None:
         )
     elif args.cross_validation:
         logger.info("Test model using cross validation.")
-        config = get_validated_path(args.config, "config", DEFAULT_CONFIG_PATH)
-        perform_nlu_cross_validation(config, nlu_data, vars(args))
+        config = cli_utils.get_validated_path(
+            args.config, "config", DEFAULT_CONFIG_PATH
+        )
+        rasa_test.perform_nlu_cross_validation(config, nlu_data, vars(args))
     else:
-        model_path = get_validated_path(args.model, "model", DEFAULT_MODELS_PATH)
-        test_nlu(model_path, nlu_data, vars(args))
+        model_path = cli_utils.get_validated_path(
+            args.model, "model", DEFAULT_MODELS_PATH
+        )
+        rasa_test.test_nlu(model_path, nlu_data, vars(args))
 
 
 def test(args: argparse.Namespace):
diff --git a/rasa/core/test.py b/rasa/core/test.py
index 8ed807301183..d6830603e938 100644
--- a/rasa/core/test.py
+++ b/rasa/core/test.py
@@ -400,9 +400,9 @@ def collect_story_predictions(
     story_eval_store = EvaluationStore()
     failed = []
     correct_dialogues = []
-    num_stories = len(completed_trackers)
+    number_of_stories = len(completed_trackers)
 
-    logger.info("Evaluating {} stories\nProgress:".format(num_stories))
+    logger.info("Evaluating {} stories\nProgress:".format(number_of_stories))
 
     action_list = []
 
@@ -451,7 +451,7 @@ def collect_story_predictions(
             action_list=action_list,
             in_training_data_fraction=in_training_data_fraction,
         ),
-        num_stories,
+        number_of_stories,
     )
 
 
@@ -594,49 +594,54 @@ async def compare_models_in_dir(
     from rasa.core import utils
     import rasa.utils.io as io_utils
 
-    num_correct = defaultdict(list)
+    number_correct = defaultdict(list)
 
     for run in io_utils.list_subdirectories(model_dir):
-        num_correct_run = defaultdict(list)
+        number_correct_in_run = defaultdict(list)
 
         for model in sorted(io_utils.list_files(run)):
-            model_name = "".join(
+            if not model.endswith("tar.gz"):
+                continue
+
+            # The model files are named like <policy-name><number>.tar.gz
+            # Remove the number from the name to get the policy name
+            policy_name = "".join(
                 [i for i in os.path.basename(model) if not i.isdigit()]
             )
-            no_of_correct_stories = await _evaluate_core_model(model, stories_file)
-            num_correct_run[model_name].append(no_of_correct_stories)
+            number_of_correct_stories = await _evaluate_core_model(model, stories_file)
+            number_correct_in_run[policy_name].append(number_of_correct_stories)
 
-        for k, v in num_correct_run.items():
-            num_correct[k].append(v)
+        for k, v in number_correct_in_run.items():
+            number_correct[k].append(v)
 
-    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
+    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), number_correct)
 
 
 async def compare_models(models: List[Text], stories_file: Text, output: Text) -> None:
     """Evaluates provided trained models on a test set."""
     from rasa.core import utils
 
-    num_correct = defaultdict(list)
+    number_correct = defaultdict(list)
 
     for model in models:
-        no_of_correct_stories = await _evaluate_core_model(model, stories_file)
-        num_correct[os.path.basename(model)].append(no_of_correct_stories)
+        number_of_correct_stories = await _evaluate_core_model(model, stories_file)
+        number_correct[os.path.basename(model)].append(number_of_correct_stories)
 
-    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), num_correct)
+    utils.dump_obj_as_json_to_file(os.path.join(output, RESULTS_FILE), number_correct)
 
 
 async def _evaluate_core_model(model: Text, stories_file: Text) -> int:
     from rasa.core.agent import Agent
 
-    logger.info("Evaluating model {}".format(model))
+    logger.info("Evaluating model '{}'".format(model))
 
     agent = Agent.load(model)
     completed_trackers = await _generate_trackers(stories_file, agent)
-    story_eval_store, no_of_stories = collect_story_predictions(
+    story_eval_store, number_of_stories = collect_story_predictions(
         completed_trackers, agent
     )
     failed_stories = story_eval_store.failed_stories
-    return no_of_stories - len(failed_stories)
+    return number_of_stories - len(failed_stories)
 
 
 def plot_nlu_results(output: Text, number_of_examples: List[int]) -> None:
diff --git a/rasa/core/training/dsl.py b/rasa/core/training/dsl.py
index be2c805359f2..01fe63ae5824 100644
--- a/rasa/core/training/dsl.py
+++ b/rasa/core/training/dsl.py
@@ -7,7 +7,7 @@
 import warnings
 from typing import Optional, List, Text, Any, Dict, TYPE_CHECKING, Iterable
 
-import rasa.utils.io
+import rasa.utils.io as io_utils
 from rasa.constants import DOCS_BASE_URL
 from rasa.core import utils
 from rasa.core.constants import INTENT_MESSAGE_PREFIX
@@ -176,8 +176,6 @@ async def read_from_folder(
         exclusion_percentage: Optional[int] = None,
     ) -> List[StoryStep]:
         """Given a path reads all contained story files."""
-        import rasa.utils.io as io_utils
-
         if not os.path.exists(resource_name):
             raise ValueError(
                 "Story file or folder could not be found. Make "
@@ -185,7 +183,7 @@ async def read_from_folder(
                 "or file.".format(os.path.abspath(resource_name))
             )
 
-        files = rasa.utils.io.list_files(resource_name)
+        files = io_utils.list_files(resource_name)
 
         return await StoryFileReader.read_from_files(
             files,
diff --git a/rasa/nlu/model.py b/rasa/nlu/model.py
index 1e2d1207513c..8efbe1a97486 100644
--- a/rasa/nlu/model.py
+++ b/rasa/nlu/model.py
@@ -221,7 +221,7 @@ def persist(
         path = os.path.abspath(path)
         dir_name = os.path.join(path, model_name)
 
-        rasa.utils.io.create_dir(dir_name)
+        rasa.utils.io.create_directory(dir_name)
 
         if self.training_data:
             metadata.update(self.training_data.persist(dir_name))
diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
index ff0bd366d25b..869d9914f241 100644
--- a/rasa/nlu/test.py
+++ b/rasa/nlu/test.py
@@ -714,7 +714,7 @@ def run_evaluation(
     }  # type: Dict[Text, Optional[Dict]]
 
     if report:
-        io_utils.create_dir(report)
+        io_utils.create_directory(report)
 
     intent_results, entity_results = get_eval_data(interpreter, test_data)
 
@@ -831,7 +831,7 @@ def cross_validate(
         nlu_config = config.load(nlu_config)
 
     if report:
-        io_utils.create_dir(report)
+        io_utils.create_directory(report)
 
     trainer = Trainer(nlu_config)
     trainer.pipeline = remove_pretrained_extractors(trainer.pipeline)
diff --git a/rasa/test.py b/rasa/test.py
index ddc1a5b43790..60488ffd7349 100644
--- a/rasa/test.py
+++ b/rasa/test.py
@@ -16,7 +16,7 @@
 logger = logging.getLogger(__name__)
 
 
-def test_core_models_in_dir(model_directory: Text, stories: Text, output: Text):
+def test_core_models_in_directory(model_directory: Text, stories: Text, output: Text):
     from rasa.core.test import compare_models_in_dir, plot_core_results
 
     loop = asyncio.get_event_loop()
@@ -58,7 +58,7 @@ def test_core(
 ):
     import rasa.core.test
     import rasa.core.utils as core_utils
-    from rasa.model import get_model, get_model_subdirectories
+    import rasa.model
     from rasa.core.interpreter import RegexInterpreter, NaturalLanguageInterpreter
     from rasa.core.agent import Agent
 
@@ -68,10 +68,10 @@ def test_core(
         kwargs = {}
 
     if output:
-        io_utils.create_dir(output)
+        io_utils.create_directory(output)
 
     try:
-        unpacked_model = get_model(model)
+        unpacked_model = rasa.model.get_model(model)
     except ModelNotFound:
         print_error(
             "Unable to test: could not find a model. Use 'rasa train' to train a "
@@ -79,7 +79,7 @@ def test_core(
         )
         return
 
-    core_path, nlu_path = get_model_subdirectories(unpacked_model)
+    core_path, nlu_path = rasa.model.get_model_subdirectories(unpacked_model)
 
     if not core_path:
         print_error(
diff --git a/rasa/utils/io.py b/rasa/utils/io.py
index 8c5e6f80d7c5..21ba7f10a4f8 100644
--- a/rasa/utils/io.py
+++ b/rasa/utils/io.py
@@ -334,13 +334,13 @@ def list_directory(path: Text) -> List[Text]:
         )
 
 
-def create_dir(dir_path: Text) -> None:
+def create_directory(directory_path: Text) -> None:
     """Creates a directory and its super paths.
 
     Succeeds even if the path already exists."""
 
     try:
-        os.makedirs(dir_path)
+        os.makedirs(directory_path)
     except OSError as e:
         # be happy if someone already created the path
         if e.errno != errno.EEXIST:
diff --git a/tests/nlu/base/test_evaluation.py b/tests/nlu/base/test_evaluation.py
index 1c5319ebf8e9..02b66c89cdd4 100644
--- a/tests/nlu/base/test_evaluation.py
+++ b/tests/nlu/base/test_evaluation.py
@@ -273,7 +273,7 @@ def test_intent_evaluation_report(tmpdir_factory):
     report_folder = os.path.join(path, "reports")
     report_filename = os.path.join(report_folder, "intent_report.json")
 
-    rasa.utils.io.create_dir(report_folder)
+    rasa.utils.io.create_directory(report_folder)
 
     intent_results = [
         IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345),
@@ -328,7 +328,7 @@ def __init__(self, component_config=None) -> None:
     report_filename_a = os.path.join(report_folder, "EntityExtractorA_report.json")
     report_filename_b = os.path.join(report_folder, "EntityExtractorB_report.json")
 
-    rasa.utils.io.create_dir(report_folder)
+    rasa.utils.io.create_directory(report_folder)
     mock_interpreter = Interpreter(
         [
             EntityExtractorA({"provides": ["entities"]}),
diff --git a/tests/nlu/base/test_utils.py b/tests/nlu/base/test_utils.py
index 108d0a022f74..fd59bc38632b 100644
--- a/tests/nlu/base/test_utils.py
+++ b/tests/nlu/base/test_utils.py
@@ -53,7 +53,7 @@ def test_list_files_ignores_hidden_files(tmpdir):
 
 def test_creation_of_existing_dir(tmpdir):
     # makes sure there is no exception
-    assert io_utils.create_dir(tmpdir.strpath) is None
+    assert io_utils.create_directory(tmpdir.strpath) is None
 
 
 def test_ordered():

From 26ee2c0727610e7b1945bc0acf48bf6aea8aee36 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 09:38:45 +0200
Subject: [PATCH 24/31] Update percentages

---
 rasa/cli/arguments/test.py  |  2 +-
 rasa/cli/arguments/train.py |  2 +-
 rasa/cli/test.py            | 16 ++++++++--------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/rasa/cli/arguments/test.py b/rasa/cli/arguments/test.py
index 6e62ee2a6144..0f3c768545f9 100644
--- a/rasa/cli/arguments/test.py
+++ b/rasa/cli/arguments/test.py
@@ -159,7 +159,7 @@ def add_test_nlu_argument_group(
         required=False,
         nargs="+",
         type=int,
-        default=[0, 25, 50, 75, 90],
+        default=[0, 25, 50, 75],
         help="Percentages of training data to exclude during comparison.",
     )
 
diff --git a/rasa/cli/arguments/train.py b/rasa/cli/arguments/train.py
index 06efd92ab175..6cb1d6be5062 100644
--- a/rasa/cli/arguments/train.py
+++ b/rasa/cli/arguments/train.py
@@ -88,7 +88,7 @@ def add_compare_params(
         "--percentages",
         nargs="*",
         type=int,
-        default=[0, 25, 50, 75, 90],
+        default=[0, 25, 50, 75],
         help="Range of exclusion percentages.",
     )
     parser.add_argument(
diff --git a/rasa/cli/test.py b/rasa/cli/test.py
index c5c294fa8c1a..5150f35e7e76 100644
--- a/rasa/cli/test.py
+++ b/rasa/cli/test.py
@@ -13,7 +13,6 @@
     DEFAULT_NLU_RESULTS_PATH,
     CONFIG_SCHEMA_FILE,
 )
-import rasa.test as rasa_test
 import rasa.utils.validation as validation_utils
 import rasa.cli.utils as cli_utils
 
@@ -59,7 +58,7 @@ def add_subparser(
 
 def test_core(args: argparse.Namespace) -> None:
     from rasa import data
-    from rasa.test import test_core
+    from rasa.test import test_core_models_in_directory, test_core, test_core_models
 
     endpoints = cli_utils.get_validated_path(
         args.endpoints, "endpoints", DEFAULT_ENDPOINTS_PATH, True
@@ -79,8 +78,8 @@ def test_core(args: argparse.Namespace) -> None:
             args.model, "model", DEFAULT_MODELS_PATH
         )
 
-        if args.evaluate_models_in_dir:
-            rasa_test.test_core_models_in_directory(args.model, stories, output)
+        if args.evaluate_model_directory:
+            test_core_models_in_directory(args.model, stories, output)
         else:
             test_core(
                 model=model_path,
@@ -91,12 +90,13 @@ def test_core(args: argparse.Namespace) -> None:
             )
 
     else:
-        rasa_test.test_core_models(args.model, stories, output)
+        test_core_models(args.model, stories, output)
 
 
 def test_nlu(args: argparse.Namespace) -> None:
     from rasa import data
     import rasa.utils.io as io_utils
+    from rasa.test import compare_nlu_models, perform_nlu_cross_validation, test_nlu
 
     nlu_data = cli_utils.get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH)
     nlu_data = data.get_nlu_directory(nlu_data)
@@ -132,7 +132,7 @@ def test_nlu(args: argparse.Namespace) -> None:
                 continue
 
         output = args.report or DEFAULT_NLU_RESULTS_PATH
-        rasa_test.compare_nlu_models(
+        compare_nlu_models(
             configs=config_files,
             nlu=nlu_data,
             output=output,
@@ -144,12 +144,12 @@ def test_nlu(args: argparse.Namespace) -> None:
         config = cli_utils.get_validated_path(
             args.config, "config", DEFAULT_CONFIG_PATH
         )
-        rasa_test.perform_nlu_cross_validation(config, nlu_data, vars(args))
+        perform_nlu_cross_validation(config, nlu_data, vars(args))
     else:
         model_path = cli_utils.get_validated_path(
             args.model, "model", DEFAULT_MODELS_PATH
         )
-        rasa_test.test_nlu(model_path, nlu_data, vars(args))
+        test_nlu(model_path, nlu_data, vars(args))
 
 
 def test(args: argparse.Namespace):

From d0b5d48d6942892a33338f8682d3662e0e5e0594 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 10:34:23 +0200
Subject: [PATCH 25/31] Fix tests.

---
 tests/cli/test_rasa_test.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/cli/test_rasa_test.py b/tests/cli/test_rasa_test.py
index c42e0a9d77bb..357bacb87c33 100644
--- a/tests/cli/test_rasa_test.py
+++ b/tests/cli/test_rasa_test.py
@@ -119,7 +119,7 @@ def test_test_core_comparison_after_train(run_in_default_project):
         "comparison_models",
         "--stories",
         "data/stories",
-        "--evaluate-models-in-dir",
+        "--evaluate-model-directory",
     )
 
     assert os.path.exists(os.path.join(DEFAULT_RESULTS_PATH, RESULTS_FILE))
@@ -134,7 +134,7 @@ def test_test_help(run):
     help_text = """usage: rasa test [-h] [-v] [-vv] [--quiet] [-m MODEL] [-s STORIES]
                  [--max-stories MAX_STORIES] [--out OUT] [--e2e]
                  [--endpoints ENDPOINTS] [--fail-on-prediction-errors]
-                 [--url URL] [--evaluate-models-in-dir] [-u NLU]
+                 [--url URL] [--evaluate-model-directory] [-u NLU]
                  [--report [REPORT]] [--successes [SUCCESSES]]
                  [--errors ERRORS] [--histogram HISTOGRAM] [--confmat CONFMAT]
                  [-c CONFIG [CONFIG ...]] [--cross-validation] [-f FOLDS]
@@ -170,7 +170,7 @@ def test_test_core_help(run):
                       [-s STORIES] [--max-stories MAX_STORIES] [--out OUT]
                       [--e2e] [--endpoints ENDPOINTS]
                       [--fail-on-prediction-errors] [--url URL]
-                      [--evaluate-models-in-dir]"""
+                      [--evaluate-model-directory]"""
 
     lines = help_text.split("\n")
 

From 52db23603fe2436d0adc40f4051e68a19cf84112 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 11:16:38 +0200
Subject: [PATCH 26/31] Add root route to server without --enable-api

---
 rasa/core/run.py     | 11 +++++++++--
 rasa/server.py       | 12 ++++++++----
 tests/conftest.py    |  8 ++++++++
 tests/test_server.py | 11 +++++++++++
 4 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/rasa/core/run.py b/rasa/core/run.py
index bbda948154e2..bf1c2c9d21aa 100644
--- a/rasa/core/run.py
+++ b/rasa/core/run.py
@@ -19,6 +19,7 @@
 from rasa.core.utils import AvailableEndpoints, configure_file_logging
 from rasa.model import get_model_subdirectories, get_model
 from rasa.utils.common import update_sanic_log_level, class_from_module_path
+from rasa.server import add_root_route
 
 logger = logging.getLogger()  # get the root logger
 
@@ -66,6 +67,13 @@ def _create_single_channel(channel, credentials):
             )
 
 
+def _create_app_without_api(cors: Optional[Union[Text, List[Text]]] = None):
+    app = Sanic(__name__, configure_logging=False)
+    add_root_route(app)
+    CORS(app, resources={r"/*": {"origins": cors or ""}}, automatic_options=True)
+    return app
+
+
 def configure_app(
     input_channels: Optional[List["InputChannel"]] = None,
     cors: Optional[Union[Text, List[Text]]] = None,
@@ -92,8 +100,7 @@ def configure_app(
             endpoints=endpoints,
         )
     else:
-        app = Sanic(__name__, configure_logging=False)
-        CORS(app, resources={r"/*": {"origins": cors or ""}}, automatic_options=True)
+        app = _create_app_without_api(cors)
 
     if input_channels:
         rasa.core.channels.channel.register(input_channels, app, route=route)
diff --git a/rasa/server.py b/rasa/server.py
index 5c17fc91519d..493b463ed5a8 100644
--- a/rasa/server.py
+++ b/rasa/server.py
@@ -274,6 +274,13 @@ async def _load_agent(
     return loaded_agent
 
 
+def add_root_route(app: Sanic):
+    @app.get("/")
+    async def hello(request: Request):
+        """Check if the server is running and responds with the version."""
+        return response.text("Hello from Rasa: " + rasa.__version__)
+
+
 def create_app(
     agent: Optional["Agent"] = None,
     cors_origins: Union[Text, List[Text]] = "*",
@@ -311,10 +318,7 @@ def create_app(
     async def handle_error_response(request: Request, exception: ErrorResponse):
         return response.json(exception.error_info, status=exception.status)
 
-    @app.get("/")
-    async def hello(request: Request):
-        """Check if the server is running and responds with the version."""
-        return response.text("Hello from Rasa: " + rasa.__version__)
+    add_root_route(app)
 
     @app.get("/version")
     async def version(request: Request):
diff --git a/tests/conftest.py b/tests/conftest.py
index 4d935ed58bde..fe81c9787b51 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,7 @@
 import pytest
 import logging
 
+from core.run import _create_app_without_api
 from rasa import server
 from rasa.core import config
 from rasa.core.agent import Agent, load_agent
@@ -176,6 +177,13 @@ async def rasa_server_secured(default_agent):
     return app
 
 
+@pytest.fixture
+async def rasa_server_without_api():
+    app = _create_app_without_api()
+    channel.register([RestInput()], app, "/webhooks/")
+    return app
+
+
 def clean_folder(folder):
     import os
 
diff --git a/tests/test_server.py b/tests/test_server.py
index 5df2029497ea..909a647b40b1 100644
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -37,6 +37,11 @@
 ]
 
 
+@pytest.fixture
+def rasa_app_without_api(rasa_server_without_api):
+    return rasa_server_without_api.test_client
+
+
 @pytest.fixture
 def rasa_app(rasa_server):
     return rasa_server.test_client
@@ -63,6 +68,12 @@ def test_root(rasa_app):
     assert response.text.startswith("Hello from Rasa:")
 
 
+def test_root_without_enable_api(rasa_app_without_api):
+    _, response = rasa_app_without_api.get("/")
+    assert response.status == 200
+    assert response.text.startswith("Hello from Rasa:")
+
+
 def test_root_secured(rasa_secured_app):
     _, response = rasa_secured_app.get("/")
     assert response.status == 200

From 5580c18f1fd9ad433840798ecb1863bacf4d6ace Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 11:17:23 +0200
Subject: [PATCH 27/31] add changelog entry

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index f0f92839412b..949111a54350 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -12,6 +12,7 @@ This project adheres to `Semantic Versioning`_ starting with version 1.0.
 
 Added
 -----
+- add root route to server started without ``--enable-api`` parameter
 
 Changed
 -------

From b6024f26b42c1ed79eed760727cd126318fd1980 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 11:40:53 +0200
Subject: [PATCH 28/31] Fix import

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index fe81c9787b51..7d77284b2101 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,7 +1,7 @@
 import pytest
 import logging
 
-from core.run import _create_app_without_api
+from rasa.core.run import _create_app_without_api
 from rasa import server
 from rasa.core import config
 from rasa.core.agent import Agent, load_agent

From 01857608d32d3f9129fe1b4396944c41fecc2118 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 13:26:35 +0200
Subject: [PATCH 29/31] StoryReader can handle multi line comments

---
 .../stories_with_multiline_comments.md        | 36 +++++++++++++++++++
 rasa/core/training/dsl.py                     | 11 +++++-
 tests/core/test_dsl.py                        | 20 +++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 data/test_stories/stories_with_multiline_comments.md

diff --git a/data/test_stories/stories_with_multiline_comments.md b/data/test_stories/stories_with_multiline_comments.md
new file mode 100644
index 000000000000..b2705b7524bc
--- /dev/null
+++ b/data/test_stories/stories_with_multiline_comments.md
@@ -0,0 +1,36 @@
+<!-- a
+fancy
+multiline
+comment -->
+## happy path
+* greet
+  - utter_greet
+* mood_great
+  - utter_happy
+
+## sad path 1
+* greet                 <!-- inline comment -->
+  - utter_greet
+* mood_unhappy
+  - utter_cheer_up
+  - utter_did_that_help
+* affirm
+  - utter_happy
+
+<!-- just one line comment-->
+## sad path 2
+* greet
+  - utter_greet
+* mood_unhappy
+  - utter_cheer_up
+  - utter_did_that_help
+* deny
+  - utter_goodbye
+
+<!--
+one more
+-->
+
+## say goodbye
+* goodbye
+  - utter_goodbye
\ No newline at end of file
diff --git a/rasa/core/training/dsl.py b/rasa/core/training/dsl.py
index 01fe63ae5824..eecbc6958516 100644
--- a/rasa/core/training/dsl.py
+++ b/rasa/core/training/dsl.py
@@ -228,7 +228,7 @@ async def read_from_file(
         interpreter: NaturalLanguageInterpreter = RegexInterpreter(),
         template_variables: Optional[Dict] = None,
         use_e2e: bool = False,
-    ):
+    ) -> List[StoryStep]:
         """Given a md file reads the contained stories."""
 
         try:
@@ -291,6 +291,7 @@ def _parse_event_line(line):
             return "", {}
 
     async def process_lines(self, lines: List[Text]) -> List[StoryStep]:
+        multiline_comment = False
 
         for idx, line in enumerate(lines):
             line_num = idx + 1
@@ -298,6 +299,14 @@ async def process_lines(self, lines: List[Text]) -> List[StoryStep]:
                 line = self._replace_template_variables(self._clean_up_line(line))
                 if line.strip() == "":
                     continue
+                elif line.startswith("<!--"):
+                    multiline_comment = True
+                    continue
+                elif multiline_comment and line.endswith("-->"):
+                    multiline_comment = False
+                    continue
+                elif multiline_comment:
+                    continue
                 elif line.startswith("#"):
                     # reached a new story block
                     name = line[1:].strip("# ")
diff --git a/tests/core/test_dsl.py b/tests/core/test_dsl.py
index bbdad84162d5..de03c1baa893 100644
--- a/tests/core/test_dsl.py
+++ b/tests/core/test_dsl.py
@@ -6,6 +6,8 @@
 
 import numpy as np
 
+from core.interpreter import RegexInterpreter
+from core.training.dsl import StoryFileReader
 from rasa.core import training
 from rasa.core.events import ActionExecuted, UserUttered
 from rasa.core.training.structures import Story
@@ -262,3 +264,21 @@ async def test_load_training_data_handles_hidden_files(tmpdir, default_domain):
 
     assert len(data.X) == 0
     assert len(data.y) == 0
+
+
+async def test_read_stories_with_multiline_comments(tmpdir, default_domain):
+    story_steps = await StoryFileReader.read_from_file(
+        "data/test_stories/stories_with_multiline_comments.md",
+        default_domain,
+        RegexInterpreter(),
+    )
+
+    assert len(story_steps) == 4
+    assert story_steps[0].block_name == "happy path"
+    assert len(story_steps[0].events) == 4
+    assert story_steps[1].block_name == "sad path 1"
+    assert len(story_steps[1].events) == 7
+    assert story_steps[2].block_name == "sad path 2"
+    assert len(story_steps[2].events) == 7
+    assert story_steps[3].block_name == "say goodbye"
+    assert len(story_steps[3].events) == 2

From 6b65eec5bd5d4f3d35c7fce36fb5f81b79780bf6 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 13:27:42 +0200
Subject: [PATCH 30/31] add changelog entry

---
 CHANGELOG.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index dbdbc8ff0332..8f20b877a36a 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -24,6 +24,7 @@ Removed
 Fixed
 -----
 - ``rasa test core`` can handle compressed model files
+- Rasa can handle story files containing multi line comments
 
 
 [1.1.8] - 2019-07-25

From 6a6a891a5c70d564222bc7830b3b1162b4cf0dd1 Mon Sep 17 00:00:00 2001
From: Tanja Bergmann <tabergma@gmail.com>
Date: Fri, 26 Jul 2019 14:26:24 +0200
Subject: [PATCH 31/31] Clean up imports

---
 tests/core/test_dsl.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/core/test_dsl.py b/tests/core/test_dsl.py
index de03c1baa893..b3c5a635fba3 100644
--- a/tests/core/test_dsl.py
+++ b/tests/core/test_dsl.py
@@ -1,4 +1,3 @@
-import io
 import os
 
 import json
@@ -6,9 +5,9 @@
 
 import numpy as np
 
-from core.interpreter import RegexInterpreter
-from core.training.dsl import StoryFileReader
 from rasa.core import training
+from rasa.core.interpreter import RegexInterpreter
+from rasa.core.training.dsl import StoryFileReader
 from rasa.core.events import ActionExecuted, UserUttered
 from rasa.core.training.structures import Story
 from rasa.core.featurizers import (