diff --git a/.travis.yml b/.travis.yml index 8c365131eae0..cb861d1d3522 100644 --- a/.travis.yml +++ b/.travis.yml @@ -29,7 +29,7 @@ before_script: - mkdir $HOME/tmp - export TMPDIR=$HOME/tmp script: - - py.test --pep8 -m pep8 + - py.test --codestyle -m codestyle - py.test tests/base --cov rasa_nlu -v --cov-append - py.test tests/training --cov rasa_nlu -v --cov-append after_success: @@ -71,14 +71,16 @@ jobs: - git remote set-url --push origin "git@github.com:$TRAVIS_REPO_SLUG" - export ${!TRAVIS*} - sphinx-versioning push docs newdocs . -- -b dirhtml -A html_theme=rasabaster - - stage: Test starter packs + - stage: test + if: branch = "*.x" # only new NLU version builds test the starter packs name: "NLU starter pack" python: 3.6 script: - git clone https://github.com/RasaHQ/starter-pack-rasa-nlu.git - cd starter-pack-rasa-nlu - python -m pytest tests/test_nlu.py - - stage: Test starter packs + - stage: test + if: branch = "*.x" # only new NLU version builds test the starter packs name: "Stack starter pack (NLU only)" python: 3.6 script: @@ -106,6 +108,7 @@ jobs: - git commit --allow-empty -m "trigger nlu docs update" - git push origin master - stage: deploy + name: "PyPI test" python: 3.6 install: skip script: skip diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 2b9840828490..01e207e4bd09 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,6 +15,9 @@ Changed - validate training data only if used for training - applied spacy guidelines on how to disable pipeline components +======= +- replace pep8 with pycodestyle + Removed ------- - **removed python 2.7 support** @@ -22,7 +25,22 @@ Removed Fixed ----- -.. _v0-14-0: +[0.14.2] - 2018-01-29 +^^^^^^^^^^^^^^^^^^^^^ + +Added +----- + +- ``rasa_nlu.evaluate`` now exports reports into a folder and also + includes the entity extractor reports + +Changed +------- +- updated requirements to match Core and SDK +- pinned keras dependecies +- starter packs are now tested in parallel with the unittests, + and only on branches ending in ``.x`` (i.e. new version releases) + [0.14.1] - 2018-01-23 ^^^^^^^^^^^^^^^^^^^^^ @@ -31,6 +49,8 @@ Fixed ----- - scikit-learn is a global requirement +.. _v0-14-0: + [0.14.0] - 2018-01-23 ^^^^^^^^^^^^^^^^^^^^^ @@ -57,6 +77,8 @@ Changed - updated TensorFlow version to 1.12.0 - updated scikit-learn version to 0.20.2 - updated cloudpickle version to 0.6.1 +- updated requirements to match Core and SDK +- pinned keras dependecies Removed ------- diff --git a/alt_requirements/requirements_bare.txt b/alt_requirements/requirements_bare.txt index 2b220e9331f2..8d750dcbdbb2 100644 --- a/alt_requirements/requirements_bare.txt +++ b/alt_requirements/requirements_bare.txt @@ -2,7 +2,8 @@ gevent==1.2.2 klein==17.10.0 hyperlink==17.3.1 typing==3.6.2 -future==0.16.0 +future==0.17.1 +six==1.11.0 jsonschema==2.6.0 matplotlib==2.1.0 requests==2.20.0 @@ -10,7 +11,7 @@ tqdm==4.19.5 numpy==1.14.5 simplejson==3.13.2 cloudpickle==0.6.1 -packaging==17.1 +packaging==18.0 ruamel.yaml==0.15.78 -coloredlogs==9.0 +coloredlogs==10.0 scikit-learn==0.20.2 diff --git a/alt_requirements/requirements_dev.txt b/alt_requirements/requirements_dev.txt index 667b14c534a7..f700f172a072 100644 --- a/alt_requirements/requirements_dev.txt +++ b/alt_requirements/requirements_dev.txt @@ -4,7 +4,7 @@ # test python-coveralls==2.9.1 -pytest-pep8==1.0.6 +pytest-pycodestyle==1.4.0 pytest-cov==2.5.1 pytest-twisted==1.6 pytest==3.3.2 diff --git a/alt_requirements/requirements_tensorflow_sklearn.txt b/alt_requirements/requirements_tensorflow_sklearn.txt index bf43ab5fcf81..6c5687e17077 100644 --- a/alt_requirements/requirements_tensorflow_sklearn.txt +++ b/alt_requirements/requirements_tensorflow_sklearn.txt @@ -4,3 +4,5 @@ tensorflow==1.12.0 scipy==1.1.0 sklearn-crfsuite==0.3.6 +keras-applications==1.0.6 +keras-preprocessing==1.0.5 \ No newline at end of file diff --git a/docs/evaluation.rst b/docs/evaluation.rst index bd008beeb9a8..84d59de1afe9 100644 --- a/docs/evaluation.rst +++ b/docs/evaluation.rst @@ -80,9 +80,9 @@ Intent Classification The evaluation script will produce a report, confusion matrix and confidence histogram for your model. -The report logs precision, recall, and f1 measure for -each intent, as well as provide an overall average. You can save this -report as a JSON file using the `--report` flag. +The report logs precision, recall and f1 measure for +each intent and entity, as well as provide an overall average. +You can save these reports as JSON files using the `--report` flag. The confusion matrix shows you which intents are mistaken for others; any samples which have been diff --git a/rasa_nlu/classifiers/embedding_intent_classifier.py b/rasa_nlu/classifiers/embedding_intent_classifier.py index 450001273ac7..3095577de50d 100644 --- a/rasa_nlu/classifiers/embedding_intent_classifier.py +++ b/rasa_nlu/classifiers/embedding_intent_classifier.py @@ -112,13 +112,13 @@ def __init__(self, component_config: Optional[Dict[Text, Any]] = None, inv_intent_dict: Optional[Dict[int, Text]] = None, encoded_all_intents: Optional[np.ndarray] = None, - session: Optional[tf.Session] = None, - graph: Optional[tf.Graph] = None, - message_placeholder: Optional[tf.Tensor] = None, - intent_placeholder: Optional[tf.Tensor] = None, - similarity_op: Optional[tf.Tensor] = None, - word_embed: Optional[tf.Tensor] = None, - intent_embed: Optional[tf.Tensor] = None + session: Optional['tf.Session'] = None, + graph: Optional['tf.Graph'] = None, + message_placeholder: Optional['tf.Tensor'] = None, + intent_placeholder: Optional['tf.Tensor'] = None, + similarity_op: Optional['tf.Tensor'] = None, + word_embed: Optional['tf.Tensor'] = None, + intent_embed: Optional['tf.Tensor'] = None ) -> None: """Declare instant variables with default values""" @@ -198,9 +198,9 @@ def required_packages(cls) -> List[Text]: def _check_tensorflow(): if tf is None: raise ImportError( - 'Failed to import `tensorflow`. ' - 'Please install `tensorflow`. ' - 'For example with `pip install tensorflow`.') + 'Failed to import `tensorflow`. ' + 'Please install `tensorflow`. ' + 'For example with `pip install tensorflow`.') # training data helpers: @staticmethod @@ -232,7 +232,7 @@ def _create_encoded_intents(self, if self.intent_tokenization_flag: intent_token_dict = self._create_intent_token_dict( - list(intent_dict.keys()), self.intent_split_symbol) + list(intent_dict.keys()), self.intent_split_symbol) encoded_all_intents = np.zeros((len(intent_dict), len(intent_token_dict))) @@ -277,8 +277,8 @@ def _prepare_data_for_training( # tf helpers: - def _create_tf_embed_nn(self, x_in: tf.Tensor, is_training: tf.Tensor, - layer_sizes: List[int], name: Text) -> tf.Tensor: + def _create_tf_embed_nn(self, x_in: 'tf.Tensor', is_training: 'tf.Tensor', + layer_sizes: List[int], name: Text) -> 'tf.Tensor': """Create nn with hidden layers and name""" reg = tf.contrib.layers.l2_regularizer(self.C2) @@ -298,10 +298,10 @@ def _create_tf_embed_nn(self, x_in: tf.Tensor, is_training: tf.Tensor, return x def _create_tf_embed(self, - a_in: tf.Tensor, - b_in: tf.Tensor, - is_training: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor]: + a_in: 'tf.Tensor', + b_in: 'tf.Tensor', + is_training: 'tf.Tensor' + ) -> Tuple['tf.Tensor', 'tf.Tensor']: """Create tf graph for training""" emb_a = self._create_tf_embed_nn(a_in, is_training, @@ -313,8 +313,8 @@ def _create_tf_embed(self, return emb_a, emb_b def _tf_sim(self, - a: tf.Tensor, - b: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]: + a: 'tf.Tensor', + b: 'tf.Tensor') -> Tuple['tf.Tensor', 'tf.Tensor']: """Define similarity in two cases: @@ -338,7 +338,7 @@ def _tf_sim(self, "should be 'cosine' or 'inner'" "".format(self.similarity_type)) - def _tf_loss(self, sim: tf.Tensor, sim_emb: tf.Tensor) -> tf.Tensor: + def _tf_loss(self, sim: 'tf.Tensor', sim_emb: 'tf.Tensor') -> 'tf.Tensor': """Define loss""" # loss for maximizing similarity with correct action @@ -379,8 +379,8 @@ def _create_batch_b(self, batch_pos_b: np.ndarray, for b in range(batch_pos_b.shape[0]): # create negative indexes out of possible ones # except for correct index of b - negative_indexes = [i for i in range( - self.encoded_all_intents.shape[0]) + negative_indexes = [i for i in + range(self.encoded_all_intents.shape[0]) if i != intent_ids[b]] negs = np.random.choice(negative_indexes, size=self.num_neg) @@ -410,9 +410,9 @@ def _train_tf(self, X: np.ndarray, Y: np.ndarray, intents_for_X: np.ndarray, - loss: tf.Tensor, - is_training: tf.Tensor, - train_op: tf.Tensor + loss: 'tf.Tensor', + is_training: 'tf.Tensor', + train_op: 'tf.Tensor' ) -> None: """Train tf graph""" @@ -443,10 +443,10 @@ def _train_tf(self, batch_b = self._create_batch_b(batch_pos_b, intents_for_b) sess_out = self.session.run( - {'loss': loss, 'train_op': train_op}, - feed_dict={self.a_in: batch_a, - self.b_in: batch_b, - is_training: True} + {'loss': loss, 'train_op': train_op}, + feed_dict={self.a_in: batch_a, + self.b_in: batch_b, + is_training: True} ) ep_loss += sess_out.get('loss') / batches_per_epoch @@ -477,7 +477,7 @@ def _train_tf(self, def _output_training_stat(self, X: np.ndarray, intents_for_X: np.ndarray, - is_training: tf.Tensor) -> np.ndarray: + is_training: 'tf.Tensor') -> np.ndarray: """Output training statistics""" n = self.evaluate_on_num_examples @@ -509,10 +509,10 @@ def train(self, self.inv_intent_dict = {v: k for k, v in intent_dict.items()} self.encoded_all_intents = self._create_encoded_intents( - intent_dict) + intent_dict) X, Y, intents_for_X = self._prepare_data_for_training( - training_data, intent_dict) + training_data, intent_dict) # check if number of negatives is less than number of intents logger.debug("Check if num_neg {} is smaller than " @@ -707,16 +707,16 @@ def load(cls, encoded_all_intents = pickle.load(f) return cls( - component_config=meta, - inv_intent_dict=inv_intent_dict, - encoded_all_intents=encoded_all_intents, - session=sess, - graph=graph, - message_placeholder=a_in, - intent_placeholder=b_in, - similarity_op=sim_op, - word_embed=word_embed, - intent_embed=intent_embed + component_config=meta, + inv_intent_dict=inv_intent_dict, + encoded_all_intents=encoded_all_intents, + session=sess, + graph=graph, + message_placeholder=a_in, + intent_placeholder=b_in, + similarity_op=sim_op, + word_embed=word_embed, + intent_embed=intent_embed ) else: diff --git a/rasa_nlu/components.py b/rasa_nlu/components.py index ce349d040b33..98e85d0930f0 100644 --- a/rasa_nlu/components.py +++ b/rasa_nlu/components.py @@ -37,7 +37,7 @@ def validate_requirements(component_names: List[Text]) -> None: for component_name in component_names: component_class = registry.get_component_class(component_name) failed_imports.update(find_unavailable_packages( - component_class.required_packages())) + component_class.required_packages())) if failed_imports: # pragma: no cover # if available, use the development file to figure out the correct # version numbers for each requirement @@ -170,7 +170,7 @@ def __init__(self, component_config["name"] = self.name self.component_config = config.override_defaults( - self.defaults, component_config) + self.defaults, component_config) self.partial_processing_pipeline = None self.partial_processing_context = None @@ -355,9 +355,9 @@ def __get_cached_component(self, component_class = registry.get_component_class(component_name) cache_key = component_class.cache_key(model_metadata) - if (cache_key is not None - and self.use_cache - and cache_key in self.component_cache): + if (cache_key is not None and + self.use_cache and + cache_key in self.component_cache): return self.component_cache[cache_key], cache_key else: return None, cache_key @@ -393,10 +393,10 @@ def load_component(self, try: cached_component, cache_key = self.__get_cached_component( - component_name, model_metadata) + component_name, model_metadata) component = registry.load_component_by_name( - component_name, model_dir, model_metadata, - cached_component, **context) + component_name, model_dir, model_metadata, + cached_component, **context) if not cached_component: # If the component wasn't in the cache, # let us add it if possible @@ -416,7 +416,7 @@ def create_component(self, try: component, cache_key = self.__get_cached_component( - component_name, Metadata(cfg.as_dict(), None)) + component_name, Metadata(cfg.as_dict(), None)) if component is None: component = registry.create_component_by_name(component_name, cfg) diff --git a/rasa_nlu/config.py b/rasa_nlu/config.py index 8d6acbf4ac2f..7e5c569766fe 100644 --- a/rasa_nlu/config.py +++ b/rasa_nlu/config.py @@ -115,7 +115,7 @@ def __init__(self, configuration_values=None): self.__dict__['pipeline'] = pipeline else: known_templates = ", ".join( - registry.registered_pipeline_templates.keys()) + registry.registered_pipeline_templates.keys()) raise InvalidConfigError("No pipeline specified and unknown " "pipeline template '{}' passed. Known " diff --git a/rasa_nlu/convert.py b/rasa_nlu/convert.py index b6d67db4731d..86680ad1a5bb 100644 --- a/rasa_nlu/convert.py +++ b/rasa_nlu/convert.py @@ -6,7 +6,7 @@ def create_argument_parser(): parser = argparse.ArgumentParser( - description='Convert training data formats into one another') + description='Convert training data formats into one another') parser.add_argument('-d', '--data_file', required=True, diff --git a/rasa_nlu/data_router.py b/rasa_nlu/data_router.py index c2955e558678..a24c8a8f4254 100644 --- a/rasa_nlu/data_router.py +++ b/rasa_nlu/data_router.py @@ -135,8 +135,8 @@ def _create_query_logger(response_log): out_file = io.open(response_logfile, 'a', encoding='utf8') # noinspection PyTypeChecker query_logger = Logger( - observer=jsonFileLogObserver(out_file, recordSeparator=''), - namespace='query-logger') + observer=jsonFileLogObserver(out_file, recordSeparator=''), + namespace='query-logger') # Prevents queries getting logged with parent logger # --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) @@ -166,12 +166,12 @@ def _create_project_store(self, if self.model_server is not None: project_store[default_project] = load_from_server( - self.component_builder, - default_project, - self.project_dir, - self.remote_storage, - self.model_server, - self.wait_time_between_pulls + self.component_builder, + default_project, + self.project_dir, + self.remote_storage, + self.model_server, + self.wait_time_between_pulls ) else: for project in projects: @@ -182,9 +182,9 @@ def _create_project_store(self, if not project_store: project_store[default_project] = Project( - project=default_project, - project_dir=self.project_dir, - remote_storage=self.remote_storage + project=default_project, + project_dir=self.project_dir, + remote_storage=self.remote_storage ) return project_store @@ -251,16 +251,16 @@ def parse(self, data: Dict[Text, Any]) -> Dict[Text, Any]: if project not in projects: raise InvalidProjectError( - "No project found with name '{}'.".format(project)) + "No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project( - self.component_builder, project, - self.project_dir, self.remote_storage) + self.component_builder, project, + self.project_dir, self.remote_storage) except Exception as e: raise InvalidProjectError( - "Unable to load project '{}'. " - "Error: {}".format(project, e)) + "Unable to load project '{}'. " + "Error: {}".format(project, e)) time = data.get('time') response = self.project_store[project].parse(data['text'], time, @@ -313,8 +313,8 @@ def start_train_process(self, self.project_store[project].status = STATUS_TRAINING elif project not in self.project_store: self.project_store[project] = Project( - self.component_builder, project, - self.project_dir, self.remote_storage) + self.component_builder, project, + self.project_dir, self.remote_storage) self.project_store[project].status = STATUS_TRAINING def training_callback(model_path): @@ -383,9 +383,9 @@ def evaluate(self, self.project_store[project]._loader_lock.release() return run_evaluation( - data_path=file_name, - model=self.project_store[project]._models[model_name], - errors_filename=None + data_path=file_name, + model=self.project_store[project]._models[model_name], + errors_filename=None ) def unload_model(self, diff --git a/rasa_nlu/evaluate.py b/rasa_nlu/evaluate.py index 756a98acd98c..1fe76f343705 100644 --- a/rasa_nlu/evaluate.py +++ b/rasa_nlu/evaluate.py @@ -2,6 +2,7 @@ from collections import defaultdict, namedtuple import json +import os import logging import numpy as np import shutil @@ -35,8 +36,8 @@ def create_argument_parser(): import argparse parser = argparse.ArgumentParser( - description='evaluate a Rasa NLU pipeline with cross ' - 'validation or on external data') + description='evaluate a Rasa NLU pipeline with cross ' + 'validation or on external data') parser.add_argument('-d', '--data', required=True, help="file containing training/evaluation data") @@ -57,8 +58,9 @@ def create_argument_parser(): help="number of CV folds (crossvalidation only)") parser.add_argument('--report', required=False, nargs='?', - const="report.json", default=False, - help="output path to save the metrics report") + const="reports", default=False, + help="output path to save the intent/entity" + "metrics report") parser.add_argument('--successes', required=False, nargs='?', const="successes.json", default=False, @@ -86,7 +88,6 @@ def plot_confusion_matrix(cm, zmin=1, out=None) -> None: # pragma: no cover """Print and plot the confusion matrix for the intent classification. - Normalization can be applied by setting `normalize=True`.""" import matplotlib.pyplot as plt from matplotlib.colors import LogNorm @@ -207,7 +208,7 @@ def drop_intents_below_freq(td: TrainingData, cutoff: int = 5): """Remove intent groups with less than cutoff instances.""" logger.debug( - "Raw data intent examples: {}".format(len(td.intent_examples))) + "Raw data intent examples: {}".format(len(td.intent_examples))) keep_examples = [ex for ex in td.intent_examples if td.examples_per_intent[ex.get("intent")] >= cutoff] @@ -277,7 +278,7 @@ def plot_intent_confidences(intent_results, intent_hist_filename): def evaluate_intents(intent_results, - report_filename, + report_folder, successes_filename, errors_filename, confmat_filename, @@ -301,13 +302,15 @@ def evaluate_intents(intent_results, targets, predictions = _targets_predictions_from(intent_results) - if report_filename: + if report_folder: report, precision, f1, accuracy = get_evaluation_metrics( - targets, predictions, output_dict=True) + targets, predictions, output_dict=True) + + report_filename = os.path.join(report_folder, 'intent_report.json') save_json(report, report_filename) logger.info("Classification report saved to {}." - .format(report_filename)) + "".format(report_filename)) else: report, precision, f1, accuracy = get_evaluation_metrics(targets, @@ -359,7 +362,6 @@ def evaluate_intents(intent_results, def merge_labels(aligned_predictions, extractor=None): """Concatenates all labels of the aligned predictions. - Takes the aligned prediction labels which are grouped for each message and concatenates them.""" @@ -382,9 +384,9 @@ def substitute_labels(labels, old, new): def evaluate_entities(targets, predictions, tokens, - extractors): # pragma: no cover + extractors, + report_folder): # pragma: no cover """Creates summary statistics for each entity extractor. - Logs precision, recall, and F1 per entity type for each extractor.""" aligned_predictions = align_all_entity_predictions(targets, predictions, @@ -397,11 +399,24 @@ def evaluate_entities(targets, for extractor in extractors: merged_predictions = merge_labels(aligned_predictions, extractor) merged_predictions = substitute_labels( - merged_predictions, "O", "no_entity") + merged_predictions, "O", "no_entity") logger.info("Evaluation for entity extractor: {} ".format(extractor)) - report, precision, f1, accuracy = get_evaluation_metrics( + if report_folder: + report, precision, f1, accuracy = get_evaluation_metrics( + merged_targets, merged_predictions, output_dict=True) + + report_filename = extractor + "_report.json" + extractor_report = os.path.join(report_folder, report_filename) + + save_json(report, extractor_report) + logger.info("Classification report for '{}' saved to '{}'." + "".format(extractor, extractor_report)) + + else: + report, precision, f1, accuracy = get_evaluation_metrics( merged_targets, merged_predictions) - log_evaluation_table(report, precision, f1, accuracy) + log_evaluation_table(report, precision, f1, accuracy) + result[extractor] = { "report": report, "precision": precision, @@ -434,9 +449,7 @@ def determine_intersection(token, entity): def do_entities_overlap(entities): """Checks if entities overlap. - I.e. cross each others start and end boundaries. - :param entities: list of entities :return: boolean """ @@ -454,7 +467,6 @@ def do_entities_overlap(entities): def find_intersecting_entites(token, entities): """Finds the entities that intersect with a token. - :param token: a single token :param entities: entities found by a single extractor :return: list of entities @@ -474,7 +486,6 @@ def find_intersecting_entites(token, entities): def pick_best_entity_fit(token, candidates): """Determines the token label given intersecting entities. - :param token: a single token :param candidates: entities found by a single extractor :return: entity type @@ -518,11 +529,9 @@ def do_extractors_support_overlap(extractors): def align_entity_predictions(targets, predictions, tokens, extractors): """Aligns entity predictions to the message tokens. - Determines for every token the true label based on the prediction targets and the label assigned by each single extractor. - :param targets: list of target entities :param predictions: list of predicted entities :param tokens: original message tokens @@ -538,7 +547,7 @@ def align_entity_predictions(targets, predictions, tokens, extractors): extractor_labels = defaultdict(list) for t in tokens: true_token_labels.append( - determine_token_labels(t, targets, None)) + determine_token_labels(t, targets, None)) for extractor, entities in entities_by_extractors.items(): extracted = determine_token_labels(t, entities, extractor) extractor_labels[extractor].append(extracted) @@ -550,7 +559,6 @@ def align_entity_predictions(targets, predictions, tokens, extractors): def align_all_entity_predictions(targets, predictions, tokens, extractors): """ Aligns entity predictions to the message tokens for the whole dataset using align_entity_predictions - :param targets: list of lists of target entities :param predictions: list of lists of predicted entities :param tokens: list of original message tokens @@ -606,10 +614,10 @@ def get_intent_predictions(targets, interpreter, for e, target in zip(test_data.training_examples, targets): res = interpreter.parse(e.text, only_output_properties=False) intent_results.append(IntentEvaluationResult( - target, - extract_intent(res), - extract_message(res), - extract_confidence(res))) + target, + extract_intent(res), + extract_message(res), + extract_confidence(res))) return intent_results @@ -631,7 +639,6 @@ def get_entity_predictions(interpreter, test_data): # pragma: no cover def get_entity_extractors(interpreter): """Finds the names of entity extractors used by the interpreter. - Processors are removed since they do not detect the boundaries themselves.""" @@ -655,7 +662,6 @@ def combine_extractor_and_dimension_name(extractor, dim): def get_duckling_dimensions(interpreter, duckling_extractor_name): """Gets the activated dimensions of a duckling extractor. - If there are no activated dimensions, it uses all known dimensions as a fallback.""" @@ -700,7 +706,7 @@ def remove_duckling_entities(entity_predictions): def run_evaluation(data_path, model, - report_filename=None, + report_folder=None, successes_filename=None, errors_filename='errors.json', confmat_filename=None, @@ -728,14 +734,17 @@ def run_evaluation(data_path, model, "entity_evaluation": None } + if report_folder: + utils.create_dir(report_folder) + if is_intent_classifier_present(interpreter): intent_targets = get_intent_targets(test_data) intent_results = get_intent_predictions( - intent_targets, interpreter, test_data) + intent_targets, interpreter, test_data) logger.info("Intent evaluation results:") result['intent_evaluation'] = evaluate_intents(intent_results, - report_filename, + report_folder, successes_filename, errors_filename, confmat_filename, @@ -748,7 +757,8 @@ def run_evaluation(data_path, model, result['entity_evaluation'] = evaluate_entities(entity_targets, entity_predictions, tokens, - extractors) + extractors, + report_folder) return result @@ -795,7 +805,6 @@ def run_cv_evaluation(data: TrainingData, n_folds: int, nlu_config: RasaNLUModelConfig) -> CVEvaluationResult: """Stratified cross validation on data - :param data: Training Data :param n_folds: integer, number of cv folds :param nlu_config: nlu config file @@ -936,7 +945,7 @@ def main(): data = training_data.load_data(cmdline_args.data) data = drop_intents_below_freq(data, cutoff=5) results, entity_results = run_cv_evaluation( - data, int(cmdline_args.folds), nlu_config) + data, int(cmdline_args.folds), nlu_config) logger.info("CV evaluation (n={})".format(cmdline_args.folds)) if any(results): diff --git a/rasa_nlu/extractors/__init__.py b/rasa_nlu/extractors/__init__.py index 613e68a9db31..42fa08f261e8 100644 --- a/rasa_nlu/extractors/__init__.py +++ b/rasa_nlu/extractors/__init__.py @@ -62,9 +62,9 @@ def filter_trainable_entities(self, data = message.data.copy() data['entities'] = entities filtered.append( - Message(text=message.text, - data=data, - output_properties=message.output_properties, - time=message.time)) + Message(text=message.text, + data=data, + output_properties=message.output_properties, + time=message.time)) return filtered diff --git a/rasa_nlu/extractors/crf_entity_extractor.py b/rasa_nlu/extractors/crf_entity_extractor.py index f24c7b555653..8a8b28232dd2 100644 --- a/rasa_nlu/extractors/crf_entity_extractor.py +++ b/rasa_nlu/extractors/crf_entity_extractor.py @@ -97,10 +97,10 @@ def _check_pos_features_and_spacy(self): def _check_spacy(): if spacy is None: raise ImportError( - 'Failed to import `spaCy`. ' - '`spaCy` is required for POS features ' - 'See https://spacy.io/usage/ for installation' - 'instructions.') + 'Failed to import `spaCy`. ' + '`spaCy` is required for POS features ' + 'See https://spacy.io/usage/ for installation' + 'instructions.') def _validate_configuration(self): if len(self.component_config.get("features", [])) % 2 != 1: @@ -127,7 +127,7 @@ def train(self, # filter out pre-trained entity examples filtered_entity_examples = self.filter_trainable_entities( - training_data.training_examples) + training_data.training_examples) # convert the dataset into features # this will train on ALL examples, even the ones @@ -148,12 +148,12 @@ def _create_dataset(self, def _check_spacy_doc(self, message): if self.pos_features and message.get("spacy_doc") is None: raise InvalidConfigError( - 'Could not find `spacy_doc` attribute for ' - 'message {}\n' - 'POS features require a pipeline component ' - 'that provides `spacy_doc` attributes, i.e. `nlp_spacy`. ' - 'See https://nlu.rasa.com/pipeline.html#nlp-spacy ' - 'for details'.format(message.text)) + 'Could not find `spacy_doc` attribute for ' + 'message {}\n' + 'POS features require a pipeline component ' + 'that provides `spacy_doc` attributes, i.e. `nlp_spacy`. ' + 'See https://nlu.rasa.com/pipeline.html#nlp-spacy ' + 'for details'.format(message.text)) def process(self, message: Message, **kwargs: Any) -> None: @@ -241,7 +241,7 @@ def _find_bilou_end(self, word_idx, entities): while not finished: label, label_confidence = self.most_likely_entity( - ent_word_idx, entities) + ent_word_idx, entities) confidence = min(confidence, label_confidence) @@ -277,7 +277,7 @@ def _handle_bilou_label(self, word_idx, entities): elif self._bilou_from_label(label) == "B": # start of multi word-entity need to represent whole extent ent_word_idx, confidence = self._find_bilou_end( - word_idx, entities) + word_idx, entities) return ent_word_idx, confidence, entity_label else: @@ -298,11 +298,11 @@ def _from_crf_to_json(self, if self.component_config["BILOU_flag"]: return self._convert_bilou_tagging_to_entity_result( - tokens, entities) + tokens, entities) else: # not using BILOU tagging scheme, multi-word entities are split. return self._convert_simple_tagging_to_entity_result( - tokens, entities) + tokens, entities) def _convert_bilou_tagging_to_entity_result(self, tokens, entities): # using the BILOU tagging scheme @@ -310,7 +310,7 @@ def _convert_bilou_tagging_to_entity_result(self, tokens, entities): word_idx = 0 while word_idx < len(tokens): end_idx, confidence, entity_label = self._handle_bilou_label( - word_idx, entities) + word_idx, entities) if end_idx is not None: ent = self._create_entity_dict(tokens, @@ -329,7 +329,7 @@ def _convert_simple_tagging_to_entity_result(self, tokens, entities): for word_idx in range(len(tokens)): entity_label, confidence = self.most_likely_entity( - word_idx, entities) + word_idx, entities) word = tokens[word_idx] if entity_label != 'O': if self.pos_features: @@ -413,9 +413,9 @@ def _sentence_to_features(self, # add all regexes as a feature regex_patterns = self.function_dict[feature](word) for p_name, matched in regex_patterns.items(): - feature_name = (prefix + ":" - + feature - + ":" + p_name) + feature_name = (prefix + ":" + + feature + + ":" + p_name) word_features[feature_name] = matched else: # append each feature to a feature vector @@ -535,14 +535,14 @@ def _train_model(self, X_train = [self._sentence_to_features(sent) for sent in df_train] y_train = [self._sentence_to_labels(sent) for sent in df_train] self.ent_tagger = sklearn_crfsuite.CRF( - algorithm='lbfgs', - # coefficient for L1 penalty - c1=self.component_config["L1_c"], - # coefficient for L2 penalty - c2=self.component_config["L2_c"], - # stop earlier - max_iterations=self.component_config["max_iterations"], - # include transitions that are possible, but not observed - all_possible_transitions=True + algorithm='lbfgs', + # coefficient for L1 penalty + c1=self.component_config["L1_c"], + # coefficient for L2 penalty + c2=self.component_config["L2_c"], + # stop earlier + max_iterations=self.component_config["max_iterations"], + # include transitions that are possible, but not observed + all_possible_transitions=True ) self.ent_tagger.fit(X_train, y_train) diff --git a/rasa_nlu/extractors/entity_synonyms.py b/rasa_nlu/extractors/entity_synonyms.py index 586720ff6f82..dd3d396e865d 100644 --- a/rasa_nlu/extractors/entity_synonyms.py +++ b/rasa_nlu/extractors/entity_synonyms.py @@ -94,8 +94,8 @@ def add_entities_if_synonyms(self, entity_a, entity_b): if original != replacement: original = original.lower() - if (original in self.synonyms - and self.synonyms[original] != replacement): + if (original in self.synonyms and + self.synonyms[original] != replacement): warnings.warn("Found conflicting synonym definitions " "for {}. Overwriting target {} with {}. " "Check your training data and remove " diff --git a/rasa_nlu/extractors/mitie_entity_extractor.py b/rasa_nlu/extractors/mitie_entity_extractor.py index 0fb592bbe7d1..8ceca799a377 100644 --- a/rasa_nlu/extractors/mitie_entity_extractor.py +++ b/rasa_nlu/extractors/mitie_entity_extractor.py @@ -75,7 +75,7 @@ def train(self, # filter out pre-trained entity examples filtered_entity_examples = self.filter_trainable_entities( - training_data.training_examples) + training_data.training_examples) for example in filtered_entity_examples: sample = self._prepare_mitie_sample(example) @@ -97,7 +97,7 @@ def _prepare_mitie_sample(self, training_example): try: # if the token is not aligned an exception will be raised start, end = MitieEntityExtractor.find_entity( - ent, text, tokens) + ent, text, tokens) except ValueError as e: logger.warning("Example skipped: {}".format(str(e))) continue diff --git a/rasa_nlu/featurizers/mitie_featurizer.py b/rasa_nlu/featurizers/mitie_featurizer.py index a567862f0c3c..ba3a9780ac26 100644 --- a/rasa_nlu/featurizers/mitie_featurizer.py +++ b/rasa_nlu/featurizers/mitie_featurizer.py @@ -37,7 +37,7 @@ def train(self, mitie_feature_extractor) example.set("text_features", self._combine_with_existing_text_features( - example, features)) + example, features)) def process(self, message: Message, **kwargs: Any) -> None: diff --git a/rasa_nlu/featurizers/ngram_featurizer.py b/rasa_nlu/featurizers/ngram_featurizer.py index 2cfa20bf996f..63c388fbfba6 100644 --- a/rasa_nlu/featurizers/ngram_featurizer.py +++ b/rasa_nlu/featurizers/ngram_featurizer.py @@ -130,7 +130,7 @@ def _get_best_ngrams(self, examples, labels): oov_strings = self._remove_in_vocab_words(examples) ngrams = self._generate_all_ngrams( - oov_strings, self.component_config["ngram_min_length"]) + oov_strings, self.component_config["ngram_min_length"]) return self._sort_applicable_ngrams(ngrams, examples, labels) def _remove_in_vocab_words(self, examples): @@ -148,9 +148,9 @@ def _is_ngram_worthy(token): Excludes every word with digits in them, hyperlinks or an assigned word vector.""" - return (not token.has_vector and not token.like_url - and not token.like_num and not token.like_email - and not token.is_punct) + return (not token.has_vector and not token.like_url and not + token.like_num and not token.like_email and not + token.is_punct) def _remove_in_vocab_words_from_sentence(self, example): """Filter for words that do not have a word vector.""" @@ -221,7 +221,7 @@ def _sort_applicable_ngrams(self, ngrams_list, examples, labels): labels = np.array(labels)[mask] return self._rank_ngrams_using_cv( - examples, labels, ngrams_list) + examples, labels, ngrams_list) except ValueError as e: if "needs samples of at least 2 classes" in str(e): # we got unlucky during the random @@ -294,11 +294,11 @@ def _generate_all_ngrams(self, list_of_strings, ngram_min_length): begin = can[:-1] end = can[1:] if n >= ngram_min_length: - if (counters[n - 1][begin] == counters[n][can] - and begin in features[n - 1]): + if (counters[n - 1][begin] == counters[n][can] and + begin in features[n - 1]): features[n - 1].remove(begin) - if (counters[n - 1][end] == counters[n][can] - and end in features[n - 1]): + if (counters[n - 1][end] == counters[n][can] and + end in features[n - 1]): features[n - 1].remove(end) return [item for sublist in list(features.values()) for item in sublist] @@ -349,7 +349,7 @@ def _score_ngram_selection(self, examples, y, existing_text_features, clf = LogisticRegression(class_weight='balanced') no_ngrams_X = self._append_ngram_features( - examples, existing_text_features, max_ngrams) + examples, existing_text_features, max_ngrams) return np.mean(cross_val_score(clf, no_ngrams_X, y, cv=cv_splits)) @staticmethod diff --git a/rasa_nlu/model.py b/rasa_nlu/model.py index f557849149fc..9a343c886012 100644 --- a/rasa_nlu/model.py +++ b/rasa_nlu/model.py @@ -158,7 +158,7 @@ def _build_pipeline(cfg: RasaNLUModelConfig, return pipeline - def train(self, data: TrainingData, **kwargs: Any)-> 'Interpreter': + def train(self, data: TrainingData, **kwargs: Any) -> 'Interpreter': """Trains the underlying pipeline using the provided training data.""" self.training_data = data @@ -195,7 +195,7 @@ def persist(self, path: Text, persistor: Optional[Persistor] = None, project_name: Text = None, - fixed_model_name: Text = None)-> Text: + fixed_model_name: Text = None) -> Text: """Persist all components of the pipeline to the passed path. Returns the directory of the persisted model.""" diff --git a/rasa_nlu/persistor.py b/rasa_nlu/persistor.py index 215112234683..1628b5bdef5e 100644 --- a/rasa_nlu/persistor.py +++ b/rasa_nlu/persistor.py @@ -43,7 +43,7 @@ def persist(self, "found.".format(model_directory)) file_key, tar_path = self._compress( - model_directory, model_name, project) + model_directory, model_name, project) self._persist_tar(file_key, tar_path) def retrieve(self, @@ -122,7 +122,7 @@ def _decompress(compressed_path: Text, target_path: Text) -> None: with tarfile.open(compressed_path, "r:gz") as tar: tar.extractall( - target_path) # project dir will be created if it not exists + target_path) # project dir will be created if it not exists class AWSPersistor(Persistor): @@ -205,7 +205,7 @@ def list_models(self, project: Text) -> List[Text]: try: blob_iterator = self.bucket.list_blobs( - prefix=self._project_prefix(project)) + prefix=self._project_prefix(project)) return [self._project_and_model_from_filename(b.name)[1] for b in blob_iterator] except Exception as e: @@ -259,9 +259,9 @@ def __init__(self, super(AzurePersistor, self).__init__() self.blob_client = azureblob.BlockBlobService( - account_name=azure_account_name, - account_key=azure_account_key, - endpoint_suffix="core.windows.net") + account_name=azure_account_name, + account_key=azure_account_key, + endpoint_suffix="core.windows.net") self._ensure_container_exists(azure_container) self.container_name = azure_container @@ -276,8 +276,8 @@ def list_models(self, project: Text) -> List[Text]: try: blob_iterator = self.blob_client.list_blobs( - self.container_name, - prefix=self._project_prefix(project) + self.container_name, + prefix=self._project_prefix(project) ) return [self._project_and_model_from_filename(b.name)[1] for b in blob_iterator] @@ -290,8 +290,8 @@ def list_projects(self) -> List[Text]: try: # noinspection PyTypeChecker blob_iterator = self.blob_client.list_blobs( - self.container_name, - prefix=None + self.container_name, + prefix=None ) projects_set = {self._project_and_model_from_filename(b.name)[0] for b in blob_iterator} @@ -305,16 +305,16 @@ def _persist_tar(self, file_key: Text, tar_path: Text) -> None: """Uploads a model persisted in the `target_dir` to Azure.""" self.blob_client.create_blob_from_path( - self.container_name, - file_key, - tar_path + self.container_name, + file_key, + tar_path ) def _retrieve_tar(self, target_filename: Text) -> None: """Downloads a model that has previously been persisted to Azure.""" self.blob_client.get_blob_to_path( - self.container_name, - target_filename, - target_filename + self.container_name, + target_filename, + target_filename ) diff --git a/rasa_nlu/project.py b/rasa_nlu/project.py index 662da1106348..12d9fc4e097b 100644 --- a/rasa_nlu/project.py +++ b/rasa_nlu/project.py @@ -66,7 +66,7 @@ def _update_model_from_server(model_server: EndpointConfig, model_directory = tempfile.mkdtemp() new_model_fingerprint, filename = _pull_model_and_fingerprint( - model_server, model_directory, project.fingerprint) + model_server, model_directory, project.fingerprint) if new_model_fingerprint: model_name = _get_remote_model_name(filename) project.fingerprint = new_model_fingerprint @@ -296,13 +296,13 @@ def update_model_from_dir_and_unload_others(self, # noinspection PyUnusedLocal status = False - logger.debug('Loading model {} from directory {}'.format( - model_name, model_dir)) + logger.debug("Loading model '{}' from directory '{}'.".format( + model_name, model_dir)) self._loader_lock.acquire() try: interpreter = self._interpreter_for_model( - model_name, model_dir) + model_name, model_dir) self._models[model_name] = interpreter status = True finally: diff --git a/rasa_nlu/registry.py b/rasa_nlu/registry.py index 42cd43061243..6ba5704883ff 100644 --- a/rasa_nlu/registry.py +++ b/rasa_nlu/registry.py @@ -101,13 +101,13 @@ def get_component_class(component_name: Text) -> Type['Component']: return utils.class_from_module_path(component_name) except Exception: raise Exception( - "Failed to find component class for '{}'. Unknown " - "component name. Check your configured pipeline and make " - "sure the mentioned component is not misspelled. If you " - "are creating your own component, make sure it is either " - "listed as part of the `component_classes` in " - "`rasa_nlu.registry.py` or is a proper name of a class " - "in a module.".format(component_name)) + "Failed to find component class for '{}'. Unknown " + "component name. Check your configured pipeline and make " + "sure the mentioned component is not misspelled. If you " + "are creating your own component, make sure it is either " + "listed as part of the `component_classes` in " + "`rasa_nlu.registry.py` or is a proper name of a class " + "in a module.".format(component_name)) return registered_components[component_name] diff --git a/rasa_nlu/run.py b/rasa_nlu/run.py index 68fc95024769..926331613862 100644 --- a/rasa_nlu/run.py +++ b/rasa_nlu/run.py @@ -10,8 +10,8 @@ def create_argument_parser(): import argparse parser = argparse.ArgumentParser( - description='run a Rasa NLU model locally on the command line ' - 'for manual testing') + description='run a Rasa NLU model locally on the command line ' + 'for manual testing') parser.add_argument('-m', '--model', required=True, help="path to model") diff --git a/rasa_nlu/server.py b/rasa_nlu/server.py index d04ec40f08bb..fa5749bf508c 100644 --- a/rasa_nlu/server.py +++ b/rasa_nlu/server.py @@ -108,19 +108,17 @@ def decorated(*args, **kwargs): if '*' in self.cors_origins: request.setHeader('Access-Control-Allow-Origin', '*') request.setHeader( - 'Access-Control-Allow-Headers', - 'Content-Type') + 'Access-Control-Allow-Headers', 'Content-Type') request.setHeader( - 'Access-Control-Allow-Methods', - 'POST, GET, OPTIONS, PUT, DELETE') + 'Access-Control-Allow-Methods', + 'POST, GET, OPTIONS, PUT, DELETE') elif origin in self.cors_origins: request.setHeader('Access-Control-Allow-Origin', origin) request.setHeader( - 'Access-Control-Allow-Headers', - 'Content-Type') + 'Access-Control-Allow-Headers', 'Content-Type') request.setHeader( - 'Access-Control-Allow-Methods', - 'POST, GET, OPTIONS, PUT, DELETE') + 'Access-Control-Allow-Methods', + 'POST, GET, OPTIONS, PUT, DELETE') else: request.setResponseCode(403) return 'forbidden' @@ -191,7 +189,7 @@ def __init__(self, self._configure_logging(loglevel, logfile) self.default_model_config = self._load_default_config( - default_config_path) + default_config_path) self.data_router = data_router self._testing = testing @@ -228,7 +226,7 @@ def parse(self, request): request_params = decode_parameters(request) else: request_params = simplejson.loads( - request.content.read().decode('utf-8', 'strict')) + request.content.read().decode('utf-8', 'strict')) if 'query' in request_params: request_params['q'] = request_params.pop('query') @@ -236,7 +234,7 @@ def parse(self, request): if 'q' not in request_params: request.setResponseCode(404) dumped = json_to_string( - {"error": "Invalid parse parameter specified"}) + {"error": "Invalid parse parameter specified"}) returnValue(dumped) else: data = self.data_router.extract(request_params) @@ -244,7 +242,7 @@ def parse(self, request): request.setResponseCode(200) response = yield (self.data_router.parse(data) if self._testing else threads.deferToThread( - self.data_router.parse, data)) + self.data_router.parse, data)) returnValue(json_to_string(response)) except InvalidProjectError as e: request.setResponseCode(404) @@ -262,8 +260,8 @@ def version(self, request): request.setHeader('Content-Type', 'application/json') return json_to_string( - {'version': __version__, - 'minimum_compatible_version': MINIMUM_COMPATIBLE_VERSION} + {'version': __version__, + 'minimum_compatible_version': MINIMUM_COMPATIBLE_VERSION} ) @app.route("/status", methods=['GET', 'OPTIONS']) @@ -348,8 +346,8 @@ def train(self, request): request.setResponseCode(200) response = yield self.data_router.start_train_process( - data_file, project, - RasaNLUModelConfig(model_config), model_name) + data_file, project, + RasaNLUModelConfig(model_config), model_name) returnValue(json_to_string({'info': 'new model trained', 'model': response})) except MaxTrainingError as e: @@ -398,9 +396,8 @@ def unload_model(self, request): try: request.setResponseCode(200) response = self.data_router.unload_model( - params.get('project', - RasaNLUModelConfig.DEFAULT_PROJECT_NAME), - params.get('model') + params.get('project', RasaNLUModelConfig.DEFAULT_PROJECT_NAME), + params.get('model') ) return simplejson.dumps(response) except Exception as e: @@ -419,13 +416,13 @@ def unload_model(self, request): _endpoints = read_endpoints(cmdline_args.endpoints) router = DataRouter( - cmdline_args.path, - cmdline_args.max_training_processes, - cmdline_args.response_log, - cmdline_args.emulate, - cmdline_args.storage, - model_server=_endpoints.model, - wait_time_between_pulls=cmdline_args.wait_time_between_pulls + cmdline_args.path, + cmdline_args.max_training_processes, + cmdline_args.response_log, + cmdline_args.emulate, + cmdline_args.storage, + model_server=_endpoints.model, + wait_time_between_pulls=cmdline_args.wait_time_between_pulls ) if pre_load: logger.debug('Preloading....') @@ -434,13 +431,13 @@ def unload_model(self, request): router._pre_load(pre_load) rasa = RasaNLU( - router, - cmdline_args.loglevel, - cmdline_args.write, - cmdline_args.num_threads, - cmdline_args.token, - cmdline_args.cors, - default_config_path=cmdline_args.config + router, + cmdline_args.loglevel, + cmdline_args.write, + cmdline_args.num_threads, + cmdline_args.token, + cmdline_args.cors, + default_config_path=cmdline_args.config ) logger.info('Started http server on port %s' % cmdline_args.port) diff --git a/rasa_nlu/tokenizers/spacy_tokenizer.py b/rasa_nlu/tokenizers/spacy_tokenizer.py index dd8a0c62ef91..a16d7f2d6086 100644 --- a/rasa_nlu/tokenizers/spacy_tokenizer.py +++ b/rasa_nlu/tokenizers/spacy_tokenizer.py @@ -20,15 +20,15 @@ class SpacyTokenizer(Tokenizer, Component): def train(self, training_data: TrainingData, config: RasaNLUModelConfig, - **kwargs: Any)-> None: + **kwargs: Any) -> None: for example in training_data.training_examples: example.set("tokens", self.tokenize(example.get("spacy_doc"))) - def process(self, message: Message, **kwargs: Any)-> None: + def process(self, message: Message, **kwargs: Any) -> None: message.set("tokens", self.tokenize(message.get("spacy_doc"))) - def tokenize(self, doc: 'Doc')-> typing.List[Token]: + def tokenize(self, doc: 'Doc') -> typing.List[Token]: return [Token(t.text, t.idx) for t in doc] diff --git a/rasa_nlu/train.py b/rasa_nlu/train.py index f15bb5196401..e3b9be7eacf7 100644 --- a/rasa_nlu/train.py +++ b/rasa_nlu/train.py @@ -15,7 +15,7 @@ def create_argument_parser(): parser = argparse.ArgumentParser( - description='train a custom language parser') + description='train a custom language parser') parser.add_argument('-o', '--path', default="models/nlu/", diff --git a/rasa_nlu/training_data/formats/markdown.py b/rasa_nlu/training_data/formats/markdown.py index 56bd4b4713c4..524ea558721c 100644 --- a/rasa_nlu/training_data/formats/markdown.py +++ b/rasa_nlu/training_data/formats/markdown.py @@ -89,7 +89,7 @@ def _load_files(self, line): if match: fname = match.group(1) self.lookup_tables.append( - {"name": self.current_title, "elements": str(fname)}) + {"name": self.current_title, "elements": str(fname)}) def _parse_item(self, line): """Parses an md list item line based on the current section type.""" @@ -103,7 +103,7 @@ def _parse_item(self, line): self._add_synonym(item, self.current_title) elif self.current_section == REGEX: self.regex_features.append( - {"name": self.current_title, "pattern": item}) + {"name": self.current_title, "pattern": item}) elif self.current_section == LOOKUP: self._add_item_to_lookup(item) diff --git a/rasa_nlu/training_data/training_data.py b/rasa_nlu/training_data/training_data.py index e4ad1fdc5c1b..195ee752bdce 100644 --- a/rasa_nlu/training_data/training_data.py +++ b/rasa_nlu/training_data/training_data.py @@ -191,24 +191,24 @@ def train_test_split(self, test.extend(ex[n_train:]) data_train = TrainingData( - train, - entity_synonyms=self.entity_synonyms, - regex_features=self.regex_features, - lookup_tables=self.lookup_tables) + train, + entity_synonyms=self.entity_synonyms, + regex_features=self.regex_features, + lookup_tables=self.lookup_tables) data_test = TrainingData( - test, - entity_synonyms=self.entity_synonyms, - regex_features=self.regex_features, - lookup_tables=self.lookup_tables) + test, + entity_synonyms=self.entity_synonyms, + regex_features=self.regex_features, + lookup_tables=self.lookup_tables) return data_train, data_test def print_stats(self) -> None: logger.info("Training data stats: \n" + "\t- intent examples: {} ({} distinct intents)\n".format( - len(self.intent_examples), len(self.intents)) + + len(self.intent_examples), len(self.intents)) + "\t- Found intents: {}\n".format( - list_to_str(self.intents)) + + list_to_str(self.intents)) + "\t- entity examples: {} ({} distinct entities)\n".format( - len(self.entity_examples), len(self.entities)) + + len(self.entity_examples), len(self.entities)) + "\t- found entities: {}\n".format( - list_to_str(self.entities))) + list_to_str(self.entities))) diff --git a/rasa_nlu/utils/__init__.py b/rasa_nlu/utils/__init__.py index a09591dcd3a6..15490e470ce4 100644 --- a/rasa_nlu/utils/__init__.py +++ b/rasa_nlu/utils/__init__.py @@ -21,20 +21,20 @@ def add_logging_option_arguments(parser, default=logging.WARNING): # arguments for logging configuration parser.add_argument( - '--debug', - help="Print lots of debugging statements. " - "Sets logging level to DEBUG", - action="store_const", - dest="loglevel", - const=logging.DEBUG, - default=default, + '--debug', + help="Print lots of debugging statements. " + "Sets logging level to DEBUG", + action="store_const", + dest="loglevel", + const=logging.DEBUG, + default=default, ) parser.add_argument( - '-v', '--verbose', - help="Be verbose. Sets logging level to INFO", - action="store_const", - dest="loglevel", - const=logging.INFO, + '-v', '--verbose', + help="Be verbose. Sets logging level to INFO", + action="store_const", + dest="loglevel", + const=logging.INFO, ) @@ -330,11 +330,11 @@ def configure_colored_logging(loglevel: Text) -> None: level_styles = coloredlogs.DEFAULT_LEVEL_STYLES.copy() level_styles['debug'] = {} coloredlogs.install( - level=loglevel, - use_chroot=False, - fmt='%(asctime)s %(levelname)-8s %(name)s - %(message)s', - level_styles=level_styles, - field_styles=field_styles) + level=loglevel, + use_chroot=False, + fmt='%(asctime)s %(levelname)-8s %(name)s - %(message)s', + level_styles=level_styles, + field_styles=field_styles) def pycloud_unpickle(file_name: Text) -> Any: @@ -494,9 +494,7 @@ def request(self, @classmethod def from_dict(cls, data): - return EndpointConfig( - data.pop("url"), - **data) + return EndpointConfig(data.pop("url"), **data) def __eq__(self, other): if isinstance(self, type(other)): diff --git a/setup.cfg b/setup.cfg index 681b38148525..b75e577060f4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,11 @@ -# pytest PEP8 configuration +# pytest pycodestyle configuration [tool:pytest] -pep8maxlinelength = 80 -pep8ignore = - docs/conf.py ALL - *.py E251 - *.py W503 - *.py E126 - -# ignoring W503: line break occurred before a binary operator -# ignoring E126: continuation line over-indented for hanging indent +codestyle_max_line_length = 80 +# ignoring W504: line break occurred after a binary operator +codestyle_ignore = + W504 +codestyle_exclude = + docs/conf.py [metadata] description-file = README.md diff --git a/setup.py b/setup.py index 936f013e2d1a..8b7f4a4dc9d8 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ tests_requires = [ "pytest~=3.3", - "pytest-pep8~=1.0", + "pytest-pycodestyle~=1.4", "pytest-cov~=2.5", "pytest-twisted<1.6", "treq~=17.8", @@ -50,7 +50,9 @@ ], 'tensorflow': ["sklearn-crfsuite~=0.3.6", "scipy~=1.1", - "tensorflow~=1.12" + "tensorflow~=1.12", + "keras-applications==1.0.6", + "keras-preprocessing==1.0.5" ], 'mitie': ["mitie"], } diff --git a/tests/base/test_components.py b/tests/base/test_components.py index 33cdb2a9f495..ffe8dc304629 100644 --- a/tests/base/test_components.py +++ b/tests/base/test_components.py @@ -80,10 +80,10 @@ def test_example_component(component_builder, tmpdir_factory): {"name": "tests.example_component.MyComponent"}]}) interpreter = utilities.interpreter_for( - component_builder, - data="./data/examples/rasa/demo-rasa.json", - path=tmpdir_factory.mktemp("projects").strpath, - config=conf) + component_builder, + data="./data/examples/rasa/demo-rasa.json", + path=tmpdir_factory.mktemp("projects").strpath, + config=conf) r = interpreter.parse("test") assert r is not None diff --git a/tests/base/test_evaluation.py b/tests/base/test_evaluation.py index cef8e14787b5..d6b53a5ef1ea 100644 --- a/tests/base/test_evaluation.py +++ b/tests/base/test_evaluation.py @@ -11,7 +11,7 @@ get_duckling_dimensions, known_duckling_dimensions, find_component, remove_duckling_extractors, drop_intents_below_freq, run_cv_evaluation, substitute_labels, IntentEvaluationResult, - evaluate_intents) + evaluate_intents, evaluate_entities) from rasa_nlu.evaluate import does_token_cross_borders from rasa_nlu.evaluate import align_entity_predictions from rasa_nlu.evaluate import determine_intersection @@ -20,6 +20,7 @@ from rasa_nlu.tokenizers import Token from rasa_nlu import utils import json +import os from rasa_nlu import training_data, config from tests import utilities @@ -257,10 +258,13 @@ def test_run_cv_evaluation(): assert len(entity_results.test['ner_crf']["F1-score"]) == n_folds -def test_evaluation_report(tmpdir_factory): +def test_intent_evaluation_report(tmpdir_factory): path = tmpdir_factory.mktemp("evaluation").strpath - report_filename = path + "report.json" + report_folder = os.path.join(path, "reports") + report_filename = os.path.join(report_folder, "intent_report.json") + + utils.create_dir(report_folder) intent_results = [ IntentEvaluationResult("", "restaurant_search", @@ -269,7 +273,7 @@ def test_evaluation_report(tmpdir_factory): "hello", 0.98765)] result = evaluate_intents(intent_results, - report_filename, + report_folder, successes_filename=None, errors_filename=None, confmat_filename=None, @@ -292,6 +296,32 @@ def test_evaluation_report(tmpdir_factory): assert result["predictions"][0] == prediction +def test_entity_evaluation_report(tmpdir_factory): + + path = tmpdir_factory.mktemp("evaluation").strpath + report_folder = os.path.join(path, "reports") + + mock_extractors = ["A", "B"] + report_filename_a = os.path.join(report_folder, "A_report.json") + report_filename_b = os.path.join(report_folder, "B_report.json") + + utils.create_dir(report_folder) + + result = evaluate_entities([EN_targets], + [EN_predicted], + [EN_tokens], + mock_extractors, + report_folder) + + report_a = json.loads(utils.read_file(report_filename_a)) + report_b = json.loads(utils.read_file(report_filename_b)) + + assert len(report_a) == 8 + assert report_a["datetime"]["support"] == 1.0 + assert report_b["macro avg"]["recall"] == 0.2 + assert result["A"]["accuracy"] == 0.75 + + def test_empty_intent_removal(): intent_results = [ IntentEvaluationResult("", "restaurant_search", @@ -308,7 +338,7 @@ def test_empty_intent_removal(): assert intent_results[0].message == "hello" -def test_evaluate_entities(): +def test_evaluate_entities_cv(): mock_extractors = ["A", "B"] result = align_entity_predictions(EN_targets, EN_predicted, EN_tokens, mock_extractors) diff --git a/tests/base/test_featurizers.py b/tests/base/test_featurizers.py index 855311957ac1..931b0cbfb5c5 100644 --- a/tests/base/test_featurizers.py +++ b/tests/base/test_featurizers.py @@ -44,14 +44,14 @@ def test_ngram_featurizer(spacy_nlp): greet = {"intent": "greet", "text_features": [0.5]} goodbye = {"intent": "goodbye", "text_features": [0.5]} labeled_sentences = [ - Message("heyheyheyhey", greet), - Message("howdyheyhowdy", greet), - Message("heyhey howdyheyhowdy", greet), - Message("howdyheyhowdy heyhey", greet), - Message("astalavistasista", goodbye), - Message("astalavistasista sistala", goodbye), - Message("sistala astalavistasista", goodbye), - ] * repetition_factor + Message("heyheyheyhey", greet), + Message("howdyheyhowdy", greet), + Message("heyhey howdyheyhowdy", greet), + Message("howdyheyhowdy heyhey", greet), + Message("astalavistasista", goodbye), + Message("astalavistasista sistala", goodbye), + Message("sistala astalavistasista", goodbye), + ] * repetition_factor for m in labeled_sentences: m.set("spacy_doc", spacy_nlp(m.text)) @@ -148,7 +148,7 @@ def test_spacy_featurizer_casing(spacy_nlp): assert np.allclose(vecs, vecs_capitalized, atol=1e-5), \ "Vectors are unequal for texts '{}' and '{}'".format( - e.text, e.text.capitalize()) + e.text, e.text.capitalize()) @pytest.mark.parametrize("sentence, expected", [ @@ -262,9 +262,9 @@ def test_count_vector_featurizer_using_tokens(tokens, expected): @pytest.mark.parametrize("sentence, expected", [ - ("ababab", [3, 3, 3, 2]), - ("ab ab ab", [2, 2, 3, 3, 3, 2]), - ("abc", [1, 1, 1, 1, 1]) + ("ababab", [3, 3, 3, 2]), + ("ab ab ab", [2, 2, 3, 3, 3, 2]), + ("abc", [1, 1, 1, 1, 1]) ]) def test_count_vector_featurizer(sentence, expected): from rasa_nlu.featurizers.count_vectors_featurizer import \ diff --git a/tests/base/test_interpreter.py b/tests/base/test_interpreter.py index 76aaf9647745..6840a49d24e0 100644 --- a/tests/base/test_interpreter.py +++ b/tests/base/test_interpreter.py @@ -25,8 +25,8 @@ def test_interpreter(pipeline_template, component_builder, tmpdir): for text in texts: result = interpreter.parse(text, time=None) assert result['text'] == text - assert (not result['intent']['name'] - or result['intent']['name'] in td.intents) + assert (not result['intent']['name'] or + result['intent']['name'] in td.intents) assert result['intent']['confidence'] >= 0 # Ensure the model doesn't detect entity types that are not present # Models on our test data set are not stable enough to diff --git a/tests/base/test_multitenancy.py b/tests/base/test_multitenancy.py index d6a1f2d6de1e..902d758294f2 100644 --- a/tests/base/test_multitenancy.py +++ b/tests/base/test_multitenancy.py @@ -38,16 +38,16 @@ def app(component_builder): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse?q=food&project=test_project_mitie", - {"entities": [], "intent": "affirm", "text": "food"} + "http://dummy-uri/parse?q=food&project=test_project_mitie", + {"entities": [], "intent": "affirm", "text": "food"} ), ResponseTest( - "http://dummy-uri/parse?q=food&project=test_project_mitie_sklearn", - {"entities": [], "intent": "restaurant_search", "text": "food"} + "http://dummy-uri/parse?q=food&project=test_project_mitie_sklearn", + {"entities": [], "intent": "restaurant_search", "text": "food"} ), ResponseTest( - "http://dummy-uri/parse?q=food&project=test_project_spacy_sklearn", - {"entities": [], "intent": "restaurant_search", "text": "food"} + "http://dummy-uri/parse?q=food&project=test_project_spacy_sklearn", + {"entities": [], "intent": "restaurant_search", "text": "food"} ), ]) @pytest.inlineCallbacks @@ -61,12 +61,12 @@ def test_get_parse(app, response_test): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse?q=food", - {"error": "No project found with name 'default'."} + "http://dummy-uri/parse?q=food", + {"error": "No project found with name 'default'."} ), ResponseTest( - "http://dummy-uri/parse?q=food&project=umpalumpa", - {"error": "No project found with name 'umpalumpa'."} + "http://dummy-uri/parse?q=food&project=umpalumpa", + {"error": "No project found with name 'umpalumpa'."} ) ]) @pytest.inlineCallbacks @@ -79,19 +79,19 @@ def test_get_parse_invalid_model(app, response_test): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse", - {"entities": [], "intent": "affirm", "text": "food"}, - payload={"q": "food", "project": "test_project_mitie"} + "http://dummy-uri/parse", + {"entities": [], "intent": "affirm", "text": "food"}, + payload={"q": "food", "project": "test_project_mitie"} ), ResponseTest( - "http://dummy-uri/parse", - {"entities": [], "intent": "restaurant_search", "text": "food"}, - payload={"q": "food", "project": "test_project_mitie_sklearn"} + "http://dummy-uri/parse", + {"entities": [], "intent": "restaurant_search", "text": "food"}, + payload={"q": "food", "project": "test_project_mitie_sklearn"} ), ResponseTest( - "http://dummy-uri/parse", - {"entities": [], "intent": "restaurant_search", "text": "food"}, - payload={"q": "food", "project": "test_project_spacy_sklearn"} + "http://dummy-uri/parse", + {"entities": [], "intent": "restaurant_search", "text": "food"}, + payload={"q": "food", "project": "test_project_spacy_sklearn"} ), ]) @pytest.inlineCallbacks @@ -128,14 +128,14 @@ def test_post_parse_specific_model(app): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse", - {"error": "No project found with name 'default'."}, - payload={"q": "food"} + "http://dummy-uri/parse", + {"error": "No project found with name 'default'."}, + payload={"q": "food"} ), ResponseTest( - "http://dummy-uri/parse", - {"error": "No project found with name 'umpalumpa'."}, - payload={"q": "food", "project": "umpalumpa"} + "http://dummy-uri/parse", + {"error": "No project found with name 'umpalumpa'."}, + payload={"q": "food", "project": "umpalumpa"} ), ]) @pytest.inlineCallbacks diff --git a/tests/base/test_server.py b/tests/base/test_server.py index 0d2fc187647a..5e516cc427e5 100644 --- a/tests/base/test_server.py +++ b/tests/base/test_server.py @@ -65,25 +65,25 @@ def test_version(app): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse?q=hello", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, 'text': 'hello'} + "http://dummy-uri/parse?q=hello", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, 'text': 'hello'} ), ResponseTest( - "http://dummy-uri/parse?query=hello", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, 'text': 'hello'} + "http://dummy-uri/parse?query=hello", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, 'text': 'hello'} ), ResponseTest( - "http://dummy-uri/parse?q=hello ńöñàśçií", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, - 'text': 'hello ńöñàśçií'} + "http://dummy-uri/parse?q=hello ńöñàśçií", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, + 'text': 'hello ńöñàśçií'} ), ResponseTest( - "http://dummy-uri/parse?q=", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 0.0, 'name': None}, 'text': ''} + "http://dummy-uri/parse?q=", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 0.0, 'name': None}, 'text': ''} ), ]) @pytest.inlineCallbacks @@ -99,25 +99,25 @@ def test_get_parse(app, response_test): @pytest.mark.parametrize("response_test", [ ResponseTest( - "http://dummy-uri/parse", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, - 'text': 'hello'}, - payload={"q": "hello"} + "http://dummy-uri/parse", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, + 'text': 'hello'}, + payload={"q": "hello"} ), ResponseTest( - "http://dummy-uri/parse", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, - 'text': 'hello'}, - payload={"query": "hello"} + "http://dummy-uri/parse", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, + 'text': 'hello'}, + payload={"query": "hello"} ), ResponseTest( - "http://dummy-uri/parse", - {'project': 'default', 'entities': [], 'model': 'fallback', - 'intent': {'confidence': 1.0, 'name': 'greet'}, - 'text': 'hello ńöñàśçií'}, - payload={"q": "hello ńöñàśçií"} + "http://dummy-uri/parse", + {'project': 'default', 'entities': [], 'model': 'fallback', + 'intent': {'confidence': 1.0, 'name': 'greet'}, + 'text': 'hello ńöñàśçií'}, + payload={"q": "hello ńöñàśçií"} ), ]) @pytest.inlineCallbacks diff --git a/tests/base/test_training_data.py b/tests/base/test_training_data.py index d93cc6332069..00f33dbb5f0b 100644 --- a/tests/base/test_training_data.py +++ b/tests/base/test_training_data.py @@ -73,7 +73,7 @@ def test_dialogflow_data(): def test_lookup_table_json(): lookup_fname = 'data/test/lookup_tables/plates.txt' td_lookup = training_data.load_data( - 'data/test/lookup_tables/lookup_table.json') + 'data/test/lookup_tables/lookup_table.json') assert td_lookup.lookup_tables[0]['name'] == 'plates' assert td_lookup.lookup_tables[0]['elements'] == lookup_fname assert td_lookup.lookup_tables[1]['name'] == 'drinks' @@ -84,7 +84,7 @@ def test_lookup_table_json(): def test_lookup_table_md(): lookup_fname = 'data/test/lookup_tables/plates.txt' td_lookup = training_data.load_data( - 'data/test/lookup_tables/lookup_table.md') + 'data/test/lookup_tables/lookup_table.md') assert td_lookup.lookup_tables[0]['name'] == 'plates' assert td_lookup.lookup_tables[0]['elements'] == lookup_fname assert td_lookup.lookup_tables[1]['name'] == 'drinks' @@ -108,8 +108,8 @@ def test_demo_data(filename): 'vegg': 'vegetarian', 'veggie': 'vegetarian'} - assert td.regex_features == [{"name": "greet", "pattern": "hey[^\s]*"}, - {"name": "zipcode", "pattern": "[0-9]{5}"}] + assert td.regex_features == [{"name": "greet", "pattern": r"hey[^\s]*"}, + {"name": "zipcode", "pattern": r"[0-9]{5}"}] @pytest.mark.parametrize("filename", ['data/examples/rasa/demo-rasa.md']) @@ -144,12 +144,12 @@ def test_data_merging(files): def test_markdown_single_sections(): td_regex_only = training_data.load_data( - 'data/test/markdown_single_sections/regex_only.md') + 'data/test/markdown_single_sections/regex_only.md') assert (td_regex_only.regex_features == - [{"name": "greet", "pattern": "hey[^\s]*"}]) + [{"name": "greet", "pattern": r"hey[^\s]*"}]) td_syn_only = training_data.load_data( - 'data/test/markdown_single_sections/synonyms_only.md') + 'data/test/markdown_single_sections/synonyms_only.md') assert td_syn_only.entity_synonyms == {'Chines': 'chinese', 'Chinese': 'chinese'} diff --git a/tests/base/test_utils.py b/tests/base/test_utils.py index b48bf13c52fc..bd0e9f41cc79 100644 --- a/tests/base/test_utils.py +++ b/tests/base/test_utils.py @@ -60,12 +60,12 @@ def test_ordered(): @pytest.mark.parametrize( - ("model_dir", "expected"), - [("test_models/test_model_mitie/model_20170628-002704", True), - ("test_models/test_model_mitie_sklearn/model_20170628-002712", True), - ("test_models/test_model_spacy_sklearn/model_20170628-002705", True), - ("test_models/", False), - ("test_models/nonexistent_for_sure_123", False)]) + ("model_dir", "expected"), + [("test_models/test_model_mitie/model_20170628-002704", True), + ("test_models/test_model_mitie_sklearn/model_20170628-002712", True), + ("test_models/test_model_spacy_sklearn/model_20170628-002705", True), + ("test_models/", False), + ("test_models/nonexistent_for_sure_123", False)]) def test_is_model_dir(model_dir, expected): assert is_model_dir(model_dir) == expected @@ -111,20 +111,20 @@ def test_is_url(): def test_endpoint_config(): endpoint = EndpointConfig( - "https://abc.defg/", - params={"A": "B"}, - headers={"X-Powered-By": "Rasa"}, - basic_auth={"username": "user", - "password": "pass"}, - token="mytoken", - token_name="letoken" + "https://abc.defg/", + params={"A": "B"}, + headers={"X-Powered-By": "Rasa"}, + basic_auth={"username": "user", + "password": "pass"}, + token="mytoken", + token_name="letoken" ) httpretty.register_uri( - httpretty.POST, - 'https://abc.defg/test', - status=500, - body='') + httpretty.POST, + 'https://abc.defg/test', + status=500, + body='') httpretty.enable() endpoint.request("post", subpath="test", diff --git a/tests/training/test_train.py b/tests/training/test_train.py index db8fa867988b..b67a7b0b3330 100644 --- a/tests/training/test_train.py +++ b/tests/training/test_train.py @@ -69,10 +69,10 @@ def test_all_components_are_in_at_least_one_test_pipeline(): def test_train_model(pipeline_template, component_builder, tmpdir): _config = utilities.base_test_conf(pipeline_template) (trained, _, persisted_path) = train.do_train( - _config, - path=tmpdir.strpath, - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + path=tmpdir.strpath, + data=DEFAULT_DATA_PATH, + component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline @@ -90,16 +90,16 @@ def test_random_seed(component_builder, tmpdir): random_seed=1) # first run (trained_a, _, persisted_path_a) = train.do_train( - _config, - path=tmpdir.strpath + "_a", - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + path=tmpdir.strpath + "_a", + data=DEFAULT_DATA_PATH, + component_builder=component_builder) # second run (trained_b, _, persisted_path_b) = train.do_train( - _config, - path=tmpdir.strpath + "_b", - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + path=tmpdir.strpath + "_b", + data=DEFAULT_DATA_PATH, + component_builder=component_builder) loaded_a = Interpreter.load(persisted_path_a, component_builder) loaded_b = Interpreter.load(persisted_path_b, component_builder) result_a = loaded_a.parse("hello")["intent"]["confidence"] @@ -113,10 +113,10 @@ def test_train_model_on_test_pipelines(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) (trained, _, persisted_path) = train.do_train( - _config, - path=tmpdir.strpath, - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + path=tmpdir.strpath, + data=DEFAULT_DATA_PATH, + component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline @@ -129,10 +129,10 @@ def test_train_model_on_test_pipelines(language, pipeline, def test_train_model_noents(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) (trained, _, persisted_path) = train.do_train( - _config, - path=tmpdir.strpath, - data="./data/test/demo-rasa-noents.json", - component_builder=component_builder) + _config, + path=tmpdir.strpath, + data="./data/test/demo-rasa-noents.json", + component_builder=component_builder) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline @@ -145,11 +145,11 @@ def test_train_model_noents(language, pipeline, component_builder, tmpdir): def test_train_model_multithread(language, pipeline, component_builder, tmpdir): _config = RasaNLUModelConfig({"pipeline": pipeline, "language": language}) (trained, _, persisted_path) = train.do_train( - _config, - path=tmpdir.strpath, - data=DEFAULT_DATA_PATH, - component_builder=component_builder, - num_threads=2) + _config, + path=tmpdir.strpath, + data=DEFAULT_DATA_PATH, + component_builder=component_builder, + num_threads=2) assert trained.pipeline loaded = Interpreter.load(persisted_path, component_builder) assert loaded.pipeline @@ -162,19 +162,19 @@ def test_train_model_empty_pipeline(component_builder): _config = utilities.base_test_conf(pipeline_template=None) with pytest.raises(ValueError): train.do_train( - _config, - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + data=DEFAULT_DATA_PATH, + component_builder=component_builder) def test_train_named_model(component_builder, tmpdir): _config = utilities.base_test_conf("keyword") (trained, _, persisted_path) = train.do_train( - _config, - path=tmpdir.strpath, - project="my_keyword_model", - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + path=tmpdir.strpath, + project="my_keyword_model", + data=DEFAULT_DATA_PATH, + component_builder=component_builder) assert trained.pipeline normalized_path = os.path.dirname(os.path.normpath(persisted_path)) # should be saved in a dir named after a project @@ -186,9 +186,9 @@ def test_handles_pipeline_with_non_existing_component(component_builder): _config.pipeline.append({"name": "my_made_up_component"}) with pytest.raises(Exception) as execinfo: train.do_train( - _config, - data=DEFAULT_DATA_PATH, - component_builder=component_builder) + _config, + data=DEFAULT_DATA_PATH, + component_builder=component_builder) assert "Failed to find component" in str(execinfo.value)