Skip to content

Commit

Permalink
Merge c0fe92d into 351fa3d
Browse files Browse the repository at this point in the history
  • Loading branch information
wochinge committed Mar 4, 2019
2 parents 351fa3d + c0fe92d commit 99050ae
Show file tree
Hide file tree
Showing 16 changed files with 204 additions and 164 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -31,6 +31,11 @@ Changed
- Components ``load(...)``, ``create(...)`` and ``cache_key(...)`` methods
additionally take component's meta/config dicts
- Components ``persist(...)`` method additionally takes file name prefix
- renamed ``rasa_nlu.evaluate`` to ``rasa_nlu.test``
- renamed ``rasa_nlu.test.run_cv_evaluation`` to
``rasa_nlu.test.cross_validate``
- renamed ``rasa_nlu.train.do_train()`` to ``rasa_nlu.train.train()``
- train command can now also load config from file

Removed
-------
Expand Down
6 changes: 6 additions & 0 deletions docs/migrations.rst
Expand Up @@ -35,6 +35,12 @@ custom components
- ``persist(...)`` method additionally takes file name prefix
Change your custom components accordingly.

function names
~~~~~~~~~~~~~~
- ``rasa_nlu.evaluate`` was renamed to ``rasa_nlu.test``
- ``rasa_nlu.test.run_cv_evaluation`` was renamed to
``rasa_nlu.test.cross_validate``
- ``rasa_nlu.train.do_train()`` was renamed to to ``rasa_nlu.train.train()``

0.13.x to 0.14.0
----------------
Expand Down
5 changes: 5 additions & 0 deletions rasa_nlu/__init__.py
Expand Up @@ -2,6 +2,11 @@

import rasa_nlu.version

from rasa_nlu.train import train
from rasa_nlu.test import run_evaluation as test
from rasa_nlu.test import cross_validate
from rasa_nlu.training_data import load_data

logging.getLogger(__name__).addHandler(logging.NullHandler())

__version__ = rasa_nlu.version.__version__
Empty file added rasa_nlu/cli/__init__.py
Empty file.
68 changes: 68 additions & 0 deletions rasa_nlu/cli/server.py
@@ -0,0 +1,68 @@
def add_server_arguments(parser):
parser.add_argument('-e', '--emulate',
choices=['wit', 'luis', 'dialogflow'],
help='which service to emulate (default: None i.e. use'
' simple built in format)')
parser.add_argument('-P', '--port',
type=int,
default=5000,
help='port on which to run server')
parser.add_argument('--pre_load',
nargs='+',
default=[],
help='Preload models into memory before starting the '
'server. \nIf given `all` as input all the models '
'will be loaded.\nElse you can specify a list of '
'specific project names.\nEg: python -m '
'rasa_nlu.server --pre_load project1 '
'--path projects '
'-c config.yaml')
parser.add_argument('-t', '--token',
help="auth token. If set, reject requests which don't "
"provide this token as a query parameter")
parser.add_argument('-w', '--write',
help='file where logs will be saved')
parser.add_argument('--path',
required=True,
help="working directory of the server. Models are"
"loaded from this directory and trained models "
"will be saved here.")
parser.add_argument('--cors',
nargs="*",
help='List of domain patterns from where CORS '
'(cross-origin resource sharing) calls are '
'allowed. The default value is `[]` which '
'forbids all CORS requests.')

parser.add_argument('--max_training_processes',
type=int,
default=1,
help='Number of processes used to handle training '
'requests. Increasing this value will have a '
'great impact on memory usage. It is '
'recommended to keep the default value.')
parser.add_argument('--num_threads',
type=int,
default=1,
help='Number of parallel threads to use for '
'handling parse requests.')
parser.add_argument('--endpoints',
help='Configuration file for the model server '
'as a yaml file')
parser.add_argument('--wait_time_between_pulls',
type=int,
default=10,
help='Wait time in seconds between NLU model server'
'queries.')
parser.add_argument('--response_log',
help='Directory where logs will be saved '
'(containing queries and responses).'
'If set to ``null`` logging will be disabled.')
parser.add_argument('--storage',
help='Set the remote location where models are stored. '
'E.g. on AWS. If nothing is configured, the '
'server will only serve the models that are '
'on disk in the configured `path`.')
parser.add_argument('-c', '--config',
help="Default model configuration file used for "
"training.")
17 changes: 9 additions & 8 deletions rasa_nlu/convert.py
Expand Up @@ -4,10 +4,7 @@
from rasa_nlu.utils import write_to_file


def create_argument_parser():
parser = argparse.ArgumentParser(
description='Convert training data formats into one another')

def add_arguments(parser):
parser.add_argument('-d', '--data_file',
required=True,
help='file or dir containing training data')
Expand Down Expand Up @@ -39,11 +36,15 @@ def convert_training_data(data_file, out_file, output_format, language):
write_to_file(out_file, output)


if __name__ == "__main__":
arg_parser = create_argument_parser()
args = arg_parser.parse_args()

def main(args):
convert_training_data(args.data_file,
args.out_file,
args.format,
args.language)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='Convert training data formats into one another')
add_arguments(parser)
main(parser.parse_args())
2 changes: 1 addition & 1 deletion rasa_nlu/data_router.py
Expand Up @@ -14,7 +14,7 @@
from rasa_nlu.components import ComponentBuilder
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.emulators import NoEmulator
from rasa_nlu.evaluate import run_evaluation
from rasa_nlu.test import run_evaluation
from rasa_nlu.model import InvalidProjectError
from rasa_nlu.project import (
Project, STATUS_FAILED, STATUS_READY, STATUS_TRAINING, load_from_server)
Expand Down
114 changes: 25 additions & 89 deletions rasa_nlu/server.py
Expand Up @@ -10,6 +10,7 @@
from twisted.internet.defer import inlineCallbacks, returnValue

from rasa_nlu import config, utils
import rasa_nlu.cli.server as cli
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.data_router import (
DataRouter, InvalidProjectError, MaxTrainingError)
Expand All @@ -23,75 +24,7 @@

def create_argument_parser():
parser = argparse.ArgumentParser(description='parse incoming text')

parser.add_argument('-e', '--emulate',
choices=['wit', 'luis', 'dialogflow'],
help='which service to emulate (default: None i.e. use'
' simple built in format)')
parser.add_argument('-P', '--port',
type=int,
default=5000,
help='port on which to run server')
parser.add_argument('--pre_load',
nargs='+',
default=[],
help='Preload models into memory before starting the '
'server. \nIf given `all` as input all the models '
'will be loaded.\nElse you can specify a list of '
'specific project names.\nEg: python -m '
'rasa_nlu.server --pre_load project1 '
'--path projects '
'-c config.yaml')
parser.add_argument('-t', '--token',
help="auth token. If set, reject requests which don't "
"provide this token as a query parameter")
parser.add_argument('-w', '--write',
help='file where logs will be saved')
parser.add_argument('--path',
required=True,
help="working directory of the server. Models are"
"loaded from this directory and trained models "
"will be saved here.")
parser.add_argument('--cors',
nargs="*",
help='List of domain patterns from where CORS '
'(cross-origin resource sharing) calls are '
'allowed. The default value is `[]` which '
'forbids all CORS requests.')

parser.add_argument('--max_training_processes',
type=int,
default=1,
help='Number of processes used to handle training '
'requests. Increasing this value will have a '
'great impact on memory usage. It is '
'recommended to keep the default value.')
parser.add_argument('--num_threads',
type=int,
default=1,
help='Number of parallel threads to use for '
'handling parse requests.')
parser.add_argument('--endpoints',
help='Configuration file for the model server '
'as a yaml file')
parser.add_argument('--wait_time_between_pulls',
type=int,
default=10,
help='Wait time in seconds between NLU model server'
'queries.')
parser.add_argument('--response_log',
help='Directory where logs will be saved '
'(containing queries and responses).'
'If set to ``null`` logging will be disabled.')
parser.add_argument('--storage',
help='Set the remote location where models are stored. '
'E.g. on AWS. If nothing is configured, the '
'server will only serve the models that are '
'on disk in the configured `path`.')
parser.add_argument('-c', '--config',
help="Default model configuration file used for "
"training.")

cli.add_server_arguments(parser)
utils.add_logging_option_arguments(parser)

return parser
Expand Down Expand Up @@ -426,23 +359,20 @@ def get_token(_clitoken: str) -> str:
return token


if __name__ == '__main__':
# Running as standalone python application
cmdline_args = create_argument_parser().parse_args()

utils.configure_colored_logging(cmdline_args.loglevel)
pre_load = cmdline_args.pre_load
def main(args):
utils.configure_colored_logging(args.loglevel)
pre_load = args.pre_load

_endpoints = read_endpoints(cmdline_args.endpoints)
_endpoints = read_endpoints(args.endpoints)

router = DataRouter(
cmdline_args.path,
cmdline_args.max_training_processes,
cmdline_args.response_log,
cmdline_args.emulate,
cmdline_args.storage,
args.path,
args.max_training_processes,
args.response_log,
args.emulate,
args.storage,
model_server=_endpoints.model,
wait_time_between_pulls=cmdline_args.wait_time_between_pulls
wait_time_between_pulls=args.wait_time_between_pulls
)
if pre_load:
logger.debug('Preloading....')
Expand All @@ -452,13 +382,19 @@ def get_token(_clitoken: str) -> str:

rasa = RasaNLU(
router,
cmdline_args.loglevel,
cmdline_args.write,
cmdline_args.num_threads,
args.loglevel,
args.write,
args.num_threads,
get_token(cmdline_args.token),
cmdline_args.cors,
default_config_path=cmdline_args.config
args.cors,
default_config_path=args.config
)

logger.info('Started http server on port %s' % cmdline_args.port)
rasa.app.run('0.0.0.0', cmdline_args.port)
logger.info('Started http server on port %s' % args.port)
rasa.app.run('0.0.0.0', args.port)


if __name__ == '__main__':
# Running as standalone python application
cmdline_args = create_argument_parser().parse_args()
main(cmdline_args)
38 changes: 24 additions & 14 deletions rasa_nlu/evaluate.py → rasa_nlu/test.py
Expand Up @@ -6,7 +6,7 @@
import logging
import numpy as np
import shutil
from typing import List, Optional, Text
from typing import List, Optional, Text, Union

from rasa_nlu import config, training_data, utils
from rasa_nlu.config import RasaNLUModelConfig
Expand Down Expand Up @@ -39,6 +39,13 @@ def create_argument_parser():
description='evaluate a Rasa NLU pipeline with cross '
'validation or on external data')

utils.add_logging_option_arguments(parser, default=logging.INFO)
_add_arguments(parser)

return parser


def _add_arguments(parser):
parser.add_argument('-d', '--data', required=True,
help="file containing training/evaluation data")

Expand Down Expand Up @@ -75,10 +82,6 @@ def create_argument_parser():
parser.add_argument('--confmat', required=False, default="confmat.png",
help="output path for the confusion matrix plot")

utils.add_logging_option_arguments(parser, default=logging.INFO)

return parser


def plot_confusion_matrix(cm,
classes,
Expand Down Expand Up @@ -803,19 +806,26 @@ def combine_entity_result(results, interpreter, data):
return results


def run_cv_evaluation(data: TrainingData,
n_folds: int,
nlu_config: RasaNLUModelConfig) -> CVEvaluationResult:
"""Stratified cross validation on data
:param data: Training Data
:param n_folds: integer, number of cv folds
:param nlu_config: nlu config file
:return: dictionary with key, list structure, where each entry in list
def cross_validate(data: TrainingData, n_folds: int,
nlu_config: Union[RasaNLUModelConfig, Text]
) -> CVEvaluationResult:
"""Stratified cross validation on data.
Args:
data: Training Data
n_folds: integer, number of cv folds
nlu_config: nlu config file
Returns:
dictionary with key, list structure, where each entry in list
corresponds to the relevant result for one fold
"""
from collections import defaultdict
import tempfile

if isinstance(nlu_config, str):
nlu_config = config.load(nlu_config)

trainer = Trainer(nlu_config)
train_results = defaultdict(list)
test_results = defaultdict(list)
Expand Down Expand Up @@ -946,7 +956,7 @@ def main():
nlu_config = config.load(cmdline_args.config)
data = training_data.load_data(cmdline_args.data)
data = drop_intents_below_freq(data, cutoff=5)
results, entity_results = run_cv_evaluation(
results, entity_results = cross_validate(
data, int(cmdline_args.folds), nlu_config)
logger.info("CV evaluation (n={})".format(cmdline_args.folds))

Expand Down

0 comments on commit 99050ae

Please sign in to comment.