Merge branch 'master' into evaluation_data_validation

RasaHQ · Feb 1, 2019 · 6d26188 · 6d26188
2 parents 27c5b37 + 0b6c515
commit 6d26188
Show file tree

Hide file tree

Showing 40 changed files with 480 additions and 419 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -29,7 +29,7 @@ before_script:
   - mkdir $HOME/tmp
   - export TMPDIR=$HOME/tmp
 script: 
-  - py.test --pep8 -m pep8
+  - py.test --codestyle -m codestyle
   - py.test tests/base --cov rasa_nlu -v --cov-append
   - py.test tests/training --cov rasa_nlu -v --cov-append
 after_success:
@@ -71,14 +71,16 @@ jobs:
     - git remote set-url --push origin "git@github.com:$TRAVIS_REPO_SLUG"
     - export ${!TRAVIS*}
     - sphinx-versioning push docs newdocs . -- -b dirhtml  -A html_theme=rasabaster
-  - stage: Test starter packs
+  - stage: test
+    if: branch = "*.x" # only new NLU version builds test the starter packs
     name: "NLU starter pack"
     python: 3.6
     script:
     - git clone https://github.com/RasaHQ/starter-pack-rasa-nlu.git
     - cd starter-pack-rasa-nlu
     - python -m pytest tests/test_nlu.py
-  - stage: Test starter packs
+  - stage: test
+    if: branch = "*.x" # only new NLU version builds test the starter packs
     name: "Stack starter pack (NLU only)"
     python: 3.6
     script:
@@ -106,6 +108,7 @@ jobs:
     - git commit --allow-empty -m "trigger nlu docs update"
     - git push origin master
   - stage: deploy
+    name: "PyPI test"
     python: 3.6
     install: skip
     script: skip

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -15,14 +15,32 @@ Changed
 - validate training data only if used for training
 - applied spacy guidelines on how to disable pipeline components
 
+=======
+- replace pep8 with pycodestyle
+
 Removed
 -------
 - **removed python 2.7 support**
 
 Fixed
 -----
 
-.. _v0-14-0:
+[0.14.2] - 2018-01-29
+^^^^^^^^^^^^^^^^^^^^^
+
+Added
+-----
+
+- ``rasa_nlu.evaluate`` now exports reports into a folder and also
+  includes the entity extractor reports
+
+Changed
+-------
+- updated requirements to match Core and SDK
+- pinned keras dependecies
+- starter packs are now tested in parallel with the unittests,
+  and only on branches ending in ``.x`` (i.e. new version releases)
+
 
 [0.14.1] - 2018-01-23
 ^^^^^^^^^^^^^^^^^^^^^
@@ -31,6 +49,8 @@ Fixed
 -----
 - scikit-learn is a global requirement
 
+.. _v0-14-0:
+
 [0.14.0] - 2018-01-23
 ^^^^^^^^^^^^^^^^^^^^^
 
@@ -57,6 +77,8 @@ Changed
 - updated TensorFlow version to 1.12.0
 - updated scikit-learn version to 0.20.2
 - updated cloudpickle version to 0.6.1
+- updated requirements to match Core and SDK
+- pinned keras dependecies
 
 Removed
 -------

diff --git a/alt_requirements/requirements_bare.txt b/alt_requirements/requirements_bare.txt
@@ -2,15 +2,16 @@ gevent==1.2.2
 klein==17.10.0
 hyperlink==17.3.1
 typing==3.6.2
-future==0.16.0
+future==0.17.1
+six==1.11.0
 jsonschema==2.6.0
 matplotlib==2.1.0
 requests==2.20.0
 tqdm==4.19.5
 numpy==1.14.5
 simplejson==3.13.2
 cloudpickle==0.6.1
-packaging==17.1
+packaging==18.0
 ruamel.yaml==0.15.78
-coloredlogs==9.0
+coloredlogs==10.0
 scikit-learn==0.20.2
diff --git a/alt_requirements/requirements_dev.txt b/alt_requirements/requirements_dev.txt
@@ -4,7 +4,7 @@
 
 # test
 python-coveralls==2.9.1
-pytest-pep8==1.0.6
+pytest-pycodestyle==1.4.0
 pytest-cov==2.5.1
 pytest-twisted==1.6
 pytest==3.3.2

diff --git a/alt_requirements/requirements_tensorflow_sklearn.txt b/alt_requirements/requirements_tensorflow_sklearn.txt
@@ -4,3 +4,5 @@
 tensorflow==1.12.0
 scipy==1.1.0
 sklearn-crfsuite==0.3.6
+keras-applications==1.0.6
+keras-preprocessing==1.0.5
diff --git a/docs/evaluation.rst b/docs/evaluation.rst
@@ -80,9 +80,9 @@ Intent Classification
 The evaluation script will produce a report, confusion matrix
 and confidence histogram for your model.
 
-The report logs precision, recall, and f1 measure for
-each intent, as well as provide an overall average.  You can save this
-report as a JSON file using the `--report` flag.
+The report logs precision, recall and f1 measure for
+each intent and entity, as well as provide an overall average.
+You can save these reports as JSON files using the `--report` flag.
 
 The confusion matrix shows you which
 intents are mistaken for others; any samples which have been

diff --git a/rasa_nlu/classifiers/embedding_intent_classifier.py b/rasa_nlu/classifiers/embedding_intent_classifier.py
@@ -112,13 +112,13 @@ def __init__(self,
                  component_config: Optional[Dict[Text, Any]] = None,
                  inv_intent_dict: Optional[Dict[int, Text]] = None,
                  encoded_all_intents: Optional[np.ndarray] = None,
-                 session: Optional[tf.Session] = None,
-                 graph: Optional[tf.Graph] = None,
-                 message_placeholder: Optional[tf.Tensor] = None,
-                 intent_placeholder: Optional[tf.Tensor] = None,
-                 similarity_op: Optional[tf.Tensor] = None,
-                 word_embed: Optional[tf.Tensor] = None,
-                 intent_embed: Optional[tf.Tensor] = None
+                 session: Optional['tf.Session'] = None,
+                 graph: Optional['tf.Graph'] = None,
+                 message_placeholder: Optional['tf.Tensor'] = None,
+                 intent_placeholder: Optional['tf.Tensor'] = None,
+                 similarity_op: Optional['tf.Tensor'] = None,
+                 word_embed: Optional['tf.Tensor'] = None,
+                 intent_embed: Optional['tf.Tensor'] = None
                  ) -> None:
         """Declare instant variables with default values"""
 
@@ -198,9 +198,9 @@ def required_packages(cls) -> List[Text]:
     def _check_tensorflow():
         if tf is None:
             raise ImportError(
-                    'Failed to import `tensorflow`. '
-                    'Please install `tensorflow`. '
-                    'For example with `pip install tensorflow`.')
+                'Failed to import `tensorflow`. '
+                'Please install `tensorflow`. '
+                'For example with `pip install tensorflow`.')
 
     # training data helpers:
     @staticmethod
@@ -232,7 +232,7 @@ def _create_encoded_intents(self,
 
         if self.intent_tokenization_flag:
             intent_token_dict = self._create_intent_token_dict(
-                    list(intent_dict.keys()), self.intent_split_symbol)
+                list(intent_dict.keys()), self.intent_split_symbol)
 
             encoded_all_intents = np.zeros((len(intent_dict),
                                             len(intent_token_dict)))
@@ -277,8 +277,8 @@ def _prepare_data_for_training(
 
         # tf helpers:
 
-    def _create_tf_embed_nn(self, x_in: tf.Tensor, is_training: tf.Tensor,
-                            layer_sizes: List[int], name: Text) -> tf.Tensor:
+    def _create_tf_embed_nn(self, x_in: 'tf.Tensor', is_training: 'tf.Tensor',
+                            layer_sizes: List[int], name: Text) -> 'tf.Tensor':
         """Create nn with hidden layers and name"""
 
         reg = tf.contrib.layers.l2_regularizer(self.C2)
@@ -298,10 +298,10 @@ def _create_tf_embed_nn(self, x_in: tf.Tensor, is_training: tf.Tensor,
         return x
 
     def _create_tf_embed(self,
-                         a_in: tf.Tensor,
-                         b_in: tf.Tensor,
-                         is_training: tf.Tensor
-                         ) -> Tuple[tf.Tensor, tf.Tensor]:
+                         a_in: 'tf.Tensor',
+                         b_in: 'tf.Tensor',
+                         is_training: 'tf.Tensor'
+                         ) -> Tuple['tf.Tensor', 'tf.Tensor']:
         """Create tf graph for training"""
 
         emb_a = self._create_tf_embed_nn(a_in, is_training,
@@ -313,8 +313,8 @@ def _create_tf_embed(self,
         return emb_a, emb_b
 
     def _tf_sim(self,
-                a: tf.Tensor,
-                b: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
+                a: 'tf.Tensor',
+                b: 'tf.Tensor') -> Tuple['tf.Tensor', 'tf.Tensor']:
         """Define similarity
 
         in two cases:
@@ -338,7 +338,7 @@ def _tf_sim(self,
                              "should be 'cosine' or 'inner'"
                              "".format(self.similarity_type))
 
-    def _tf_loss(self, sim: tf.Tensor, sim_emb: tf.Tensor) -> tf.Tensor:
+    def _tf_loss(self, sim: 'tf.Tensor', sim_emb: 'tf.Tensor') -> 'tf.Tensor':
         """Define loss"""
 
         # loss for maximizing similarity with correct action
@@ -379,8 +379,8 @@ def _create_batch_b(self, batch_pos_b: np.ndarray,
         for b in range(batch_pos_b.shape[0]):
             # create negative indexes out of possible ones
             # except for correct index of b
-            negative_indexes = [i for i in range(
-                    self.encoded_all_intents.shape[0])
+            negative_indexes = [i for i in
+                                range(self.encoded_all_intents.shape[0])
                                 if i != intent_ids[b]]
             negs = np.random.choice(negative_indexes, size=self.num_neg)
 
@@ -410,9 +410,9 @@ def _train_tf(self,
                   X: np.ndarray,
                   Y: np.ndarray,
                   intents_for_X: np.ndarray,
-                  loss: tf.Tensor,
-                  is_training: tf.Tensor,
-                  train_op: tf.Tensor
+                  loss: 'tf.Tensor',
+                  is_training: 'tf.Tensor',
+                  train_op: 'tf.Tensor'
                   ) -> None:
         """Train tf graph"""
 
@@ -443,10 +443,10 @@ def _train_tf(self,
                 batch_b = self._create_batch_b(batch_pos_b, intents_for_b)
 
                 sess_out = self.session.run(
-                        {'loss': loss, 'train_op': train_op},
-                        feed_dict={self.a_in: batch_a,
-                                   self.b_in: batch_b,
-                                   is_training: True}
+                    {'loss': loss, 'train_op': train_op},
+                    feed_dict={self.a_in: batch_a,
+                               self.b_in: batch_b,
+                               is_training: True}
                 )
                 ep_loss += sess_out.get('loss') / batches_per_epoch
 
@@ -477,7 +477,7 @@ def _train_tf(self,
     def _output_training_stat(self,
                               X: np.ndarray,
                               intents_for_X: np.ndarray,
-                              is_training: tf.Tensor) -> np.ndarray:
+                              is_training: 'tf.Tensor') -> np.ndarray:
         """Output training statistics"""
 
         n = self.evaluate_on_num_examples
@@ -509,10 +509,10 @@ def train(self,
 
         self.inv_intent_dict = {v: k for k, v in intent_dict.items()}
         self.encoded_all_intents = self._create_encoded_intents(
-                intent_dict)
+            intent_dict)
 
         X, Y, intents_for_X = self._prepare_data_for_training(
-                training_data, intent_dict)
+            training_data, intent_dict)
 
         # check if number of negatives is less than number of intents
         logger.debug("Check if num_neg {} is smaller than "
@@ -707,16 +707,16 @@ def load(cls,
                 encoded_all_intents = pickle.load(f)
 
             return cls(
-                    component_config=meta,
-                    inv_intent_dict=inv_intent_dict,
-                    encoded_all_intents=encoded_all_intents,
-                    session=sess,
-                    graph=graph,
-                    message_placeholder=a_in,
-                    intent_placeholder=b_in,
-                    similarity_op=sim_op,
-                    word_embed=word_embed,
-                    intent_embed=intent_embed
+                component_config=meta,
+                inv_intent_dict=inv_intent_dict,
+                encoded_all_intents=encoded_all_intents,
+                session=sess,
+                graph=graph,
+                message_placeholder=a_in,
+                intent_placeholder=b_in,
+                similarity_op=sim_op,
+                word_embed=word_embed,
+                intent_embed=intent_embed
             )
 
         else:

diff --git a/rasa_nlu/components.py b/rasa_nlu/components.py
@@ -37,7 +37,7 @@ def validate_requirements(component_names: List[Text]) -> None:
     for component_name in component_names:
         component_class = registry.get_component_class(component_name)
         failed_imports.update(find_unavailable_packages(
-                component_class.required_packages()))
+            component_class.required_packages()))
     if failed_imports:  # pragma: no cover
         # if available, use the development file to figure out the correct
         # version numbers for each requirement
@@ -170,7 +170,7 @@ def __init__(self,
         component_config["name"] = self.name
 
         self.component_config = config.override_defaults(
-                self.defaults, component_config)
+            self.defaults, component_config)
 
         self.partial_processing_pipeline = None
         self.partial_processing_context = None
@@ -355,9 +355,9 @@ def __get_cached_component(self,
 
         component_class = registry.get_component_class(component_name)
         cache_key = component_class.cache_key(model_metadata)
-        if (cache_key is not None
-                and self.use_cache
-                and cache_key in self.component_cache):
+        if (cache_key is not None and
+                self.use_cache and
+                cache_key in self.component_cache):
             return self.component_cache[cache_key], cache_key
         else:
             return None, cache_key
@@ -393,10 +393,10 @@ def load_component(self,
 
         try:
             cached_component, cache_key = self.__get_cached_component(
-                    component_name, model_metadata)
+                component_name, model_metadata)
             component = registry.load_component_by_name(
-                    component_name, model_dir, model_metadata,
-                    cached_component, **context)
+                component_name, model_dir, model_metadata,
+                cached_component, **context)
             if not cached_component:
                 # If the component wasn't in the cache,
                 # let us add it if possible
@@ -416,7 +416,7 @@ def create_component(self,
 
         try:
             component, cache_key = self.__get_cached_component(
-                    component_name, Metadata(cfg.as_dict(), None))
+                component_name, Metadata(cfg.as_dict(), None))
             if component is None:
                 component = registry.create_component_by_name(component_name,
                                                               cfg)

diff --git a/rasa_nlu/config.py b/rasa_nlu/config.py
@@ -115,7 +115,7 @@ def __init__(self, configuration_values=None):
                 self.__dict__['pipeline'] = pipeline
             else:
                 known_templates = ", ".join(
-                        registry.registered_pipeline_templates.keys())
+                    registry.registered_pipeline_templates.keys())
 
                 raise InvalidConfigError("No pipeline specified and unknown "
                                          "pipeline template '{}' passed. Known "

diff --git a/rasa_nlu/convert.py b/rasa_nlu/convert.py
@@ -6,7 +6,7 @@
 
 def create_argument_parser():
     parser = argparse.ArgumentParser(
-            description='Convert training data formats into one another')
+        description='Convert training data formats into one another')
 
     parser.add_argument('-d', '--data_file',
                         required=True,