From ebf556ee1381da3a587d8b22c33b2dd967d8a12b Mon Sep 17 00:00:00 2001 From: Daksh Date: Fri, 6 Sep 2019 15:23:33 +0200 Subject: [PATCH] add check for empty text of a particular attribute --- .../featurizers/count_vectors_featurizer.py | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/rasa/nlu/featurizers/count_vectors_featurizer.py b/rasa/nlu/featurizers/count_vectors_featurizer.py index 850da9ea4002..751ead03db49 100644 --- a/rasa/nlu/featurizers/count_vectors_featurizer.py +++ b/rasa/nlu/featurizers/count_vectors_featurizer.py @@ -433,6 +433,10 @@ def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]): "Unable to train a shared CountVectorizer. Leaving an untrained CountVectorizer" ) + @staticmethod + def _attribute_texts_is_non_empty(attribute_texts): + return any(text for text in attribute_texts) + def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]): """Construct the vectorizers and train them with an independent vocab""" @@ -449,13 +453,18 @@ def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]) ) for attribute in MESSAGE_ATTRIBUTES: - - try: - self.vectorizers[attribute].fit(attribute_texts[attribute]) - except ValueError: - logger.warning( - "Unable to train CountVectorizer for message attribute {}. " - "Leaving an untrained CountVectorizer for it".format(attribute) + if self._attribute_texts_is_non_empty(attribute_texts[attribute]): + try: + self.vectorizers[attribute].fit(attribute_texts[attribute]) + except ValueError: + logger.warning( + "Unable to train CountVectorizer for message attribute {}. " + "Leaving an untrained CountVectorizer for it".format(attribute) + ) + else: + logger.debug( + "No text provided for {} attribute in any messages of training data. Skipping " + "training a CountVectorizer for it.".format(attribute) ) def _get_featurized_attribute(