diff --git a/rasa/nlu/featurizers/count_vectors_featurizer.py b/rasa/nlu/featurizers/count_vectors_featurizer.py index 850da9ea4002..751ead03db49 100644 --- a/rasa/nlu/featurizers/count_vectors_featurizer.py +++ b/rasa/nlu/featurizers/count_vectors_featurizer.py @@ -433,6 +433,10 @@ def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]): "Unable to train a shared CountVectorizer. Leaving an untrained CountVectorizer" ) + @staticmethod + def _attribute_texts_is_non_empty(attribute_texts): + return any(text for text in attribute_texts) + def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]): """Construct the vectorizers and train them with an independent vocab""" @@ -449,13 +453,18 @@ def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]) ) for attribute in MESSAGE_ATTRIBUTES: - - try: - self.vectorizers[attribute].fit(attribute_texts[attribute]) - except ValueError: - logger.warning( - "Unable to train CountVectorizer for message attribute {}. " - "Leaving an untrained CountVectorizer for it".format(attribute) + if self._attribute_texts_is_non_empty(attribute_texts[attribute]): + try: + self.vectorizers[attribute].fit(attribute_texts[attribute]) + except ValueError: + logger.warning( + "Unable to train CountVectorizer for message attribute {}. " + "Leaving an untrained CountVectorizer for it".format(attribute) + ) + else: + logger.debug( + "No text provided for {} attribute in any messages of training data. Skipping " + "training a CountVectorizer for it.".format(attribute) ) def _get_featurized_attribute(