From 85ffabcb299064241c064ab525a198600b6a4a80 Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Wed, 18 Mar 2020 13:58:15 -0400 Subject: [PATCH 1/8] make full retrieval intent name available in tracker --- changelog/4826.improvement.rst | 4 ++ rasa/nlu/selectors/response_selector.py | 76 ++++++++++++++++++++++++- tests/nlu/selectors/test_selectors.py | 9 ++- 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 changelog/4826.improvement.rst diff --git a/changelog/4826.improvement.rst b/changelog/4826.improvement.rst new file mode 100644 index 000000000000..c7a3b11b400f --- /dev/null +++ b/changelog/4826.improvement.rst @@ -0,0 +1,4 @@ +Add full retrieval intent name to message data +``ResponseSelector`` will now add the full retrieval intent name +e.g. ``faq/which_version`` to the prediction, making it accessible +from the tracker. \ No newline at end of file diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index a7be78a303f3..c1d1787cfdcc 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -2,13 +2,17 @@ import numpy as np import tensorflow as tf +from pathlib import Path from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type +import rasa.utils.io as io_utils +from rasa.utils import train_utils from rasa.nlu.config import InvalidConfigError from rasa.nlu.training_data import TrainingData, Message from rasa.nlu.components import Component from rasa.nlu.featurizers.featurizer import Featurizer +from rasa.nlu.model import Metadata from rasa.nlu.classifiers.diet_classifier import ( DIETClassifier, DIET, @@ -66,9 +70,12 @@ from rasa.nlu.constants import ( RESPONSE, RESPONSE_SELECTOR_PROPERTY_NAME, + RESPONSE_KEY_ATTRIBUTE, + INTENT, DEFAULT_OPEN_UTTERANCE_TYPE, TEXT, ) + from rasa.utils.tensorflow.model_data import RasaModelData from rasa.utils.tensorflow.models import RasaModel @@ -203,6 +210,7 @@ def __init__( index_label_id_mapping: Optional[Dict[int, Text]] = None, index_tag_id_mapping: Optional[Dict[int, Text]] = None, model: Optional[RasaModel] = None, + retrieval_intent_mapping: Optional[Dict[Text, Text]] = None, ) -> None: component_config = component_config or {} @@ -211,6 +219,7 @@ def __init__( component_config[INTENT_CLASSIFICATION] = True component_config[ENTITY_RECOGNITION] = False component_config[BILOU_FLAG] = None + self.retrieval_intent_mapping = retrieval_intent_mapping or {} super().__init__( component_config, index_label_id_mapping, index_tag_id_mapping, model @@ -231,6 +240,20 @@ def _check_config_parameters(self) -> None: super()._check_config_parameters() self._load_selector_params(self.component_config) + @staticmethod + def _create_retrieval_intent_mapping( + training_data: TrainingData, + ) -> Dict[Text, Text]: + """Create response_key dictionary""" + + retrieval_intent_mapping = {} + for example in training_data.intent_examples: + retrieval_intent_mapping[ + example.get(RESPONSE) + ] = f"{example.get(INTENT)}/{example.get(RESPONSE_KEY_ATTRIBUTE)}" + + return retrieval_intent_mapping + @staticmethod def _set_message_property( message: Message, prediction_dict: Dict[Text, Any], selector_key: Text @@ -262,6 +285,9 @@ def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData: label_id_index_mapping = self._label_id_index_mapping( training_data, attribute=RESPONSE ) + self.retrieval_intent_mapping = self._create_retrieval_intent_mapping( + training_data + ) if not label_id_index_mapping: # no labels are present to train @@ -288,6 +314,8 @@ def process(self, message: Message, **kwargs: Any) -> None: out = self._predict(message) label, label_ranking = self._predict_label(out) + retrieval_intent_name = self.retrieval_intent_mapping.get(label.get("name")) + # add suffix to label here selector_key = ( self.retrieval_intent @@ -299,10 +327,56 @@ def process(self, message: Message, **kwargs: Any) -> None: f"Adding following selector key to message property: {selector_key}" ) - prediction_dict = {"response": label, "ranking": label_ranking} + prediction_dict = { + "response": label, + "ranking": label_ranking, + "full_retrieval_intent": retrieval_intent_name, + } self._set_message_property(message, prediction_dict, selector_key) + def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: + """Persist this model into the passed directory. + + Return the metadata necessary to load the model again. + """ + super().persist(file_name, model_dir) + + model_dir = Path(model_dir) + + io_utils.json_pickle( + model_dir / f"{file_name}.retrieval_intent_mapping.pkl", + self.retrieval_intent_mapping, + ) + return {"file": file_name} + + @classmethod + def load( + cls, + meta: Dict[Text, Any], + model_dir: Text = None, + model_metadata: Metadata = None, + cached_component: Optional["ResponseSelector"] = None, + retrieval_intent_mapping: Optional[Dict[Text, Text]] = None, + **kwargs: Any, + ) -> "ResponseSelector": + """Loads the trained model from the provided directory.""" + + model = super().load( + meta, model_dir, model_metadata, cached_component, **kwargs + ) + + file_name = meta.get("file") + model_dir = Path(model_dir) + + retrieval_intent_mapping = io_utils.json_unpickle( + model_dir / f"{file_name}.retrieval_intent_mapping.pkl" + ) + + model.retrieval_intent_mapping = retrieval_intent_mapping + + return model + class DIET2DIET(DIET): def _check_data(self) -> None: diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index fbce352a6b8b..7862ef3a03e7 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -4,6 +4,7 @@ from rasa.nlu.training_data import load_data from rasa.nlu.train import Trainer, Interpreter from rasa.utils.tensorflow.constants import EPOCHS +from rasa.nlu.constants import RESPONSE_SELECTOR_PROPERTY_NAME @pytest.mark.parametrize( @@ -33,6 +34,12 @@ def test_train_selector(pipeline, component_builder, tmpdir): assert trainer.pipeline loaded = Interpreter.load(persisted_path, component_builder) + parsed = loaded.parse("hello") assert loaded.pipeline - assert loaded.parse("hello") is not None + assert parsed is not None + assert ( + parsed.get(RESPONSE_SELECTOR_PROPERTY_NAME) + .get("default") + .get("full_intent_name") + ) is not None From 1e4f52fb783e0ca28b27821f801428851100bc71 Mon Sep 17 00:00:00 2001 From: Melinda Loubser <32034278+melindaloubser1@users.noreply.github.com> Date: Wed, 18 Mar 2020 13:59:45 -0400 Subject: [PATCH 2/8] Update response_selector.py --- rasa/nlu/selectors/response_selector.py | 1 - 1 file changed, 1 deletion(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index c1d1787cfdcc..e5c67d4d559a 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -315,7 +315,6 @@ def process(self, message: Message, **kwargs: Any) -> None: out = self._predict(message) label, label_ranking = self._predict_label(out) retrieval_intent_name = self.retrieval_intent_mapping.get(label.get("name")) - # add suffix to label here selector_key = ( self.retrieval_intent From d5887c3d710ad4b2adc123bf2e53eb5a805292b5 Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Wed, 18 Mar 2020 14:04:35 -0400 Subject: [PATCH 3/8] remove unused import and argument --- rasa/nlu/selectors/response_selector.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index c1d1787cfdcc..6f8fa25c85f6 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -7,7 +7,6 @@ from typing import Any, Dict, Optional, Text, Tuple, Union, List, Type import rasa.utils.io as io_utils -from rasa.utils import train_utils from rasa.nlu.config import InvalidConfigError from rasa.nlu.training_data import TrainingData, Message from rasa.nlu.components import Component @@ -357,7 +356,6 @@ def load( model_dir: Text = None, model_metadata: Metadata = None, cached_component: Optional["ResponseSelector"] = None, - retrieval_intent_mapping: Optional[Dict[Text, Text]] = None, **kwargs: Any, ) -> "ResponseSelector": """Loads the trained model from the provided directory.""" From 7efcc19818501bc152443b14008cf189ffc66cb3 Mon Sep 17 00:00:00 2001 From: Melinda Loubser <32034278+melindaloubser1@users.noreply.github.com> Date: Wed, 18 Mar 2020 14:30:45 -0400 Subject: [PATCH 4/8] Update tests/nlu/selectors/test_selectors.py Co-Authored-By: Tanja --- tests/nlu/selectors/test_selectors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/nlu/selectors/test_selectors.py b/tests/nlu/selectors/test_selectors.py index 7862ef3a03e7..98f69e99d2b9 100644 --- a/tests/nlu/selectors/test_selectors.py +++ b/tests/nlu/selectors/test_selectors.py @@ -41,5 +41,5 @@ def test_train_selector(pipeline, component_builder, tmpdir): assert ( parsed.get(RESPONSE_SELECTOR_PROPERTY_NAME) .get("default") - .get("full_intent_name") + .get("full_retrieval_intent") ) is not None From c72ba04de9ea13463ed3d756641a456c0ced6c43 Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Thu, 19 Mar 2020 11:49:13 -0400 Subject: [PATCH 5/8] ensure load and persist behave as they should --- rasa/nlu/selectors/response_selector.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index b3c3564e568f..e3c37d11e839 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -338,7 +338,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: Return the metadata necessary to load the model again. """ - super().persist(file_name, model_dir) + if self.model is None: + return {"file": None} model_dir = Path(model_dir) @@ -346,7 +347,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: model_dir / f"{file_name}.retrieval_intent_mapping.pkl", self.retrieval_intent_mapping, ) - return {"file": file_name} + + return super().persist(file_name, model_dir) @classmethod def load( @@ -362,6 +364,8 @@ def load( model = super().load( meta, model_dir, model_metadata, cached_component, **kwargs ) + if model == cls(component_config=meta): + return model file_name = meta.get("file") model_dir = Path(model_dir) From f39e1f9cf4daff59438e45979af8a9378e326bd5 Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Thu, 19 Mar 2020 12:05:30 -0400 Subject: [PATCH 6/8] disable return type check --- rasa/nlu/selectors/response_selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index e3c37d11e839..1a11b831f492 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -376,7 +376,7 @@ def load( model.retrieval_intent_mapping = retrieval_intent_mapping - return model + return model # pytype: disable=bad-return-type class DIET2DIET(DIET): From 540369b232cf341dad51005344150b5f0616ce33 Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Thu, 19 Mar 2020 13:08:03 -0400 Subject: [PATCH 7/8] disable bad return type in if statement too --- rasa/nlu/selectors/response_selector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 1a11b831f492..4faa76ea1586 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -365,7 +365,8 @@ def load( meta, model_dir, model_metadata, cached_component, **kwargs ) if model == cls(component_config=meta): - return model + model.retrieval_intent_mapping = {} + return model # pytype: disable=bad-return-type file_name = meta.get("file") model_dir = Path(model_dir) From 87e550a6ff56e8732fab7b2740b764326c36514d Mon Sep 17 00:00:00 2001 From: melindaloubser1 Date: Thu, 19 Mar 2020 15:33:42 -0400 Subject: [PATCH 8/8] pass right model_dir to persist --- rasa/nlu/selectors/response_selector.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rasa/nlu/selectors/response_selector.py b/rasa/nlu/selectors/response_selector.py index 4faa76ea1586..cec49ea80e6d 100644 --- a/rasa/nlu/selectors/response_selector.py +++ b/rasa/nlu/selectors/response_selector.py @@ -341,6 +341,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: if self.model is None: return {"file": None} + super().persist(file_name, model_dir) + model_dir = Path(model_dir) io_utils.json_pickle( @@ -348,7 +350,7 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]: self.retrieval_intent_mapping, ) - return super().persist(file_name, model_dir) + return {"file": file_name} @classmethod def load(