Merge c0fe92d into 351fa3d

RasaHQ · Mar 4, 2019 · 99050ae · 99050ae
2 parents 351fa3d + c0fe92d
commit 99050ae
Show file tree

Hide file tree

Showing 16 changed files with 204 additions and 164 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -31,6 +31,11 @@ Changed
 - Components ``load(...)``, ``create(...)`` and ``cache_key(...)`` methods
   additionally take component's meta/config dicts
 - Components ``persist(...)`` method additionally takes file name prefix
+- renamed ``rasa_nlu.evaluate`` to ``rasa_nlu.test``
+- renamed ``rasa_nlu.test.run_cv_evaluation`` to
+  ``rasa_nlu.test.cross_validate``
+- renamed ``rasa_nlu.train.do_train()`` to ``rasa_nlu.train.train()``
+- train command can now also load config from file
 
 Removed
 -------

diff --git a/docs/migrations.rst b/docs/migrations.rst
@@ -35,6 +35,12 @@ custom components
   - ``persist(...)`` method additionally takes file name prefix
   Change your custom components accordingly.
 
+function names
+~~~~~~~~~~~~~~
+- ``rasa_nlu.evaluate`` was renamed to ``rasa_nlu.test``
+- ``rasa_nlu.test.run_cv_evaluation`` was renamed to
+  ``rasa_nlu.test.cross_validate``
+- ``rasa_nlu.train.do_train()`` was renamed to to ``rasa_nlu.train.train()``
 
 0.13.x to 0.14.0
 ----------------

diff --git a/rasa_nlu/__init__.py b/rasa_nlu/__init__.py
@@ -2,6 +2,11 @@
 
 import rasa_nlu.version
 
+from rasa_nlu.train import train
+from rasa_nlu.test import run_evaluation as test
+from rasa_nlu.test import cross_validate
+from rasa_nlu.training_data import load_data
+
 logging.getLogger(__name__).addHandler(logging.NullHandler())
 
 __version__ = rasa_nlu.version.__version__
diff --git a/rasa_nlu/cli/__init__.py b/rasa_nlu/cli/__init__.py
diff --git a/rasa_nlu/cli/server.py b/rasa_nlu/cli/server.py
@@ -0,0 +1,68 @@
+def add_server_arguments(parser):
+    parser.add_argument('-e', '--emulate',
+                        choices=['wit', 'luis', 'dialogflow'],
+                        help='which service to emulate (default: None i.e. use'
+                             ' simple built in format)')
+    parser.add_argument('-P', '--port',
+                        type=int,
+                        default=5000,
+                        help='port on which to run server')
+    parser.add_argument('--pre_load',
+                        nargs='+',
+                        default=[],
+                        help='Preload models into memory before starting the '
+                             'server. \nIf given `all` as input all the models '
+                             'will be loaded.\nElse you can specify a list of '
+                             'specific project names.\nEg: python -m '
+                             'rasa_nlu.server --pre_load project1 '
+                             '--path projects '
+                             '-c config.yaml')
+    parser.add_argument('-t', '--token',
+                        help="auth token. If set, reject requests which don't "
+                             "provide this token as a query parameter")
+    parser.add_argument('-w', '--write',
+                        help='file where logs will be saved')
+    parser.add_argument('--path',
+                        required=True,
+                        help="working directory of the server. Models are"
+                             "loaded from this directory and trained models "
+                             "will be saved here.")
+    parser.add_argument('--cors',
+                        nargs="*",
+                        help='List of domain patterns from where CORS '
+                             '(cross-origin resource sharing) calls are '
+                             'allowed. The default value is `[]` which '
+                             'forbids all CORS requests.')
+
+    parser.add_argument('--max_training_processes',
+                        type=int,
+                        default=1,
+                        help='Number of processes used to handle training '
+                             'requests. Increasing this value will have a '
+                             'great impact on memory usage. It is '
+                             'recommended to keep the default value.')
+    parser.add_argument('--num_threads',
+                        type=int,
+                        default=1,
+                        help='Number of parallel threads to use for '
+                             'handling parse requests.')
+    parser.add_argument('--endpoints',
+                        help='Configuration file for the model server '
+                             'as a yaml file')
+    parser.add_argument('--wait_time_between_pulls',
+                        type=int,
+                        default=10,
+                        help='Wait time in seconds between NLU model server'
+                             'queries.')
+    parser.add_argument('--response_log',
+                        help='Directory where logs will be saved '
+                             '(containing queries and responses).'
+                             'If set to ``null`` logging will be disabled.')
+    parser.add_argument('--storage',
+                        help='Set the remote location where models are stored. '
+                             'E.g. on AWS. If nothing is configured, the '
+                             'server will only serve the models that are '
+                             'on disk in the configured `path`.')
+    parser.add_argument('-c', '--config',
+                        help="Default model configuration file used for "
+                             "training.")
diff --git a/rasa_nlu/convert.py b/rasa_nlu/convert.py
@@ -4,10 +4,7 @@
 from rasa_nlu.utils import write_to_file
 
 
-def create_argument_parser():
-    parser = argparse.ArgumentParser(
-        description='Convert training data formats into one another')
-
+def add_arguments(parser):
     parser.add_argument('-d', '--data_file',
                         required=True,
                         help='file or dir containing training data')
@@ -39,11 +36,15 @@ def convert_training_data(data_file, out_file, output_format, language):
     write_to_file(out_file, output)
 
 
-if __name__ == "__main__":
-    arg_parser = create_argument_parser()
-    args = arg_parser.parse_args()
-
+def main(args):
     convert_training_data(args.data_file,
                           args.out_file,
                           args.format,
                           args.language)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='Convert training data formats into one another')
+    add_arguments(parser)
+    main(parser.parse_args())
diff --git a/rasa_nlu/data_router.py b/rasa_nlu/data_router.py
@@ -14,7 +14,7 @@
 from rasa_nlu.components import ComponentBuilder
 from rasa_nlu.config import RasaNLUModelConfig
 from rasa_nlu.emulators import NoEmulator
-from rasa_nlu.evaluate import run_evaluation
+from rasa_nlu.test import run_evaluation
 from rasa_nlu.model import InvalidProjectError
 from rasa_nlu.project import (
     Project, STATUS_FAILED, STATUS_READY, STATUS_TRAINING, load_from_server)

diff --git a/rasa_nlu/server.py b/rasa_nlu/server.py
@@ -10,6 +10,7 @@
 from twisted.internet.defer import inlineCallbacks, returnValue
 
 from rasa_nlu import config, utils
+import rasa_nlu.cli.server as cli
 from rasa_nlu.config import RasaNLUModelConfig
 from rasa_nlu.data_router import (
     DataRouter, InvalidProjectError, MaxTrainingError)
@@ -23,75 +24,7 @@
 
 def create_argument_parser():
     parser = argparse.ArgumentParser(description='parse incoming text')
-
-    parser.add_argument('-e', '--emulate',
-                        choices=['wit', 'luis', 'dialogflow'],
-                        help='which service to emulate (default: None i.e. use'
-                             ' simple built in format)')
-    parser.add_argument('-P', '--port',
-                        type=int,
-                        default=5000,
-                        help='port on which to run server')
-    parser.add_argument('--pre_load',
-                        nargs='+',
-                        default=[],
-                        help='Preload models into memory before starting the '
-                             'server. \nIf given `all` as input all the models '
-                             'will be loaded.\nElse you can specify a list of '
-                             'specific project names.\nEg: python -m '
-                             'rasa_nlu.server --pre_load project1 '
-                             '--path projects '
-                             '-c config.yaml')
-    parser.add_argument('-t', '--token',
-                        help="auth token. If set, reject requests which don't "
-                             "provide this token as a query parameter")
-    parser.add_argument('-w', '--write',
-                        help='file where logs will be saved')
-    parser.add_argument('--path',
-                        required=True,
-                        help="working directory of the server. Models are"
-                             "loaded from this directory and trained models "
-                             "will be saved here.")
-    parser.add_argument('--cors',
-                        nargs="*",
-                        help='List of domain patterns from where CORS '
-                             '(cross-origin resource sharing) calls are '
-                             'allowed. The default value is `[]` which '
-                             'forbids all CORS requests.')
-
-    parser.add_argument('--max_training_processes',
-                        type=int,
-                        default=1,
-                        help='Number of processes used to handle training '
-                             'requests. Increasing this value will have a '
-                             'great impact on memory usage. It is '
-                             'recommended to keep the default value.')
-    parser.add_argument('--num_threads',
-                        type=int,
-                        default=1,
-                        help='Number of parallel threads to use for '
-                             'handling parse requests.')
-    parser.add_argument('--endpoints',
-                        help='Configuration file for the model server '
-                             'as a yaml file')
-    parser.add_argument('--wait_time_between_pulls',
-                        type=int,
-                        default=10,
-                        help='Wait time in seconds between NLU model server'
-                             'queries.')
-    parser.add_argument('--response_log',
-                        help='Directory where logs will be saved '
-                             '(containing queries and responses).'
-                             'If set to ``null`` logging will be disabled.')
-    parser.add_argument('--storage',
-                        help='Set the remote location where models are stored. '
-                             'E.g. on AWS. If nothing is configured, the '
-                             'server will only serve the models that are '
-                             'on disk in the configured `path`.')
-    parser.add_argument('-c', '--config',
-                        help="Default model configuration file used for "
-                             "training.")
-
+    cli.add_server_arguments(parser)
     utils.add_logging_option_arguments(parser)
 
     return parser
@@ -426,23 +359,20 @@ def get_token(_clitoken: str) -> str:
     return token
 
 
-if __name__ == '__main__':
-    # Running as standalone python application
-    cmdline_args = create_argument_parser().parse_args()
-
-    utils.configure_colored_logging(cmdline_args.loglevel)
-    pre_load = cmdline_args.pre_load
+def main(args):
+    utils.configure_colored_logging(args.loglevel)
+    pre_load = args.pre_load
 
-    _endpoints = read_endpoints(cmdline_args.endpoints)
+    _endpoints = read_endpoints(args.endpoints)
 
     router = DataRouter(
-        cmdline_args.path,
-        cmdline_args.max_training_processes,
-        cmdline_args.response_log,
-        cmdline_args.emulate,
-        cmdline_args.storage,
+        args.path,
+        args.max_training_processes,
+        args.response_log,
+        args.emulate,
+        args.storage,
         model_server=_endpoints.model,
-        wait_time_between_pulls=cmdline_args.wait_time_between_pulls
+        wait_time_between_pulls=args.wait_time_between_pulls
     )
     if pre_load:
         logger.debug('Preloading....')
@@ -452,13 +382,19 @@ def get_token(_clitoken: str) -> str:
 
     rasa = RasaNLU(
         router,
-        cmdline_args.loglevel,
-        cmdline_args.write,
-        cmdline_args.num_threads,
+        args.loglevel,
+        args.write,
+        args.num_threads,
         get_token(cmdline_args.token),
-        cmdline_args.cors,
-        default_config_path=cmdline_args.config
+        args.cors,
+        default_config_path=args.config
     )
 
-    logger.info('Started http server on port %s' % cmdline_args.port)
-    rasa.app.run('0.0.0.0', cmdline_args.port)
+    logger.info('Started http server on port %s' % args.port)
+    rasa.app.run('0.0.0.0', args.port)
+
+
+if __name__ == '__main__':
+    # Running as standalone python application
+    cmdline_args = create_argument_parser().parse_args()
+    main(cmdline_args)
diff --git a/rasa_nlu/evaluate.py → rasa_nlu/test.py b/rasa_nlu/evaluate.py → rasa_nlu/test.py
@@ -6,7 +6,7 @@
 import logging
 import numpy as np
 import shutil
-from typing import List, Optional, Text
+from typing import List, Optional, Text, Union
 
 from rasa_nlu import config, training_data, utils
 from rasa_nlu.config import RasaNLUModelConfig
@@ -39,6 +39,13 @@ def create_argument_parser():
         description='evaluate a Rasa NLU pipeline with cross '
                     'validation or on external data')
 
+    utils.add_logging_option_arguments(parser, default=logging.INFO)
+    _add_arguments(parser)
+
+    return parser
+
+
+def _add_arguments(parser):
     parser.add_argument('-d', '--data', required=True,
                         help="file containing training/evaluation data")
 
@@ -75,10 +82,6 @@ def create_argument_parser():
     parser.add_argument('--confmat', required=False, default="confmat.png",
                         help="output path for the confusion matrix plot")
 
-    utils.add_logging_option_arguments(parser, default=logging.INFO)
-
-    return parser
-
 
 def plot_confusion_matrix(cm,
                           classes,
@@ -803,19 +806,26 @@ def combine_entity_result(results, interpreter, data):
     return results
 
 
-def run_cv_evaluation(data: TrainingData,
-                      n_folds: int,
-                      nlu_config: RasaNLUModelConfig) -> CVEvaluationResult:
-    """Stratified cross validation on data
-    :param data: Training Data
-    :param n_folds: integer, number of cv folds
-    :param nlu_config: nlu config file
-    :return: dictionary with key, list structure, where each entry in list
+def cross_validate(data: TrainingData, n_folds: int,
+                   nlu_config: Union[RasaNLUModelConfig, Text]
+                   ) -> CVEvaluationResult:
+    """Stratified cross validation on data.
+
+    Args:
+        data: Training Data
+        n_folds: integer, number of cv folds
+        nlu_config: nlu config file
+
+    Returns:
+        dictionary with key, list structure, where each entry in list
               corresponds to the relevant result for one fold
     """
     from collections import defaultdict
     import tempfile
 
+    if isinstance(nlu_config, str):
+        nlu_config = config.load(nlu_config)
+
     trainer = Trainer(nlu_config)
     train_results = defaultdict(list)
     test_results = defaultdict(list)
@@ -946,7 +956,7 @@ def main():
         nlu_config = config.load(cmdline_args.config)
         data = training_data.load_data(cmdline_args.data)
         data = drop_intents_below_freq(data, cutoff=5)
-        results, entity_results = run_cv_evaluation(
+        results, entity_results = cross_validate(
             data, int(cmdline_args.folds), nlu_config)
         logger.info("CV evaluation (n={})".format(cmdline_args.folds))