From ebf556ee1381da3a587d8b22c33b2dd967d8a12b Mon Sep 17 00:00:00 2001
From: Daksh <d.varshneya@rasa.com>
Date: Fri, 6 Sep 2019 15:23:33 +0200
Subject: [PATCH] add check for empty text of a particular attribute

---
 .../featurizers/count_vectors_featurizer.py   | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/rasa/nlu/featurizers/count_vectors_featurizer.py b/rasa/nlu/featurizers/count_vectors_featurizer.py
index 850da9ea4002..751ead03db49 100644
--- a/rasa/nlu/featurizers/count_vectors_featurizer.py
+++ b/rasa/nlu/featurizers/count_vectors_featurizer.py
@@ -433,6 +433,10 @@ def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]):
                 "Unable to train a shared CountVectorizer. Leaving an untrained CountVectorizer"
             )
 
+    @staticmethod
+    def _attribute_texts_is_non_empty(attribute_texts):
+        return any(text for text in attribute_texts)
+
     def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]):
         """Construct the vectorizers and train them with an independent vocab"""
 
@@ -449,13 +453,18 @@ def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]])
         )
 
         for attribute in MESSAGE_ATTRIBUTES:
-
-            try:
-                self.vectorizers[attribute].fit(attribute_texts[attribute])
-            except ValueError:
-                logger.warning(
-                    "Unable to train CountVectorizer for message attribute {}. "
-                    "Leaving an untrained CountVectorizer for it".format(attribute)
+            if self._attribute_texts_is_non_empty(attribute_texts[attribute]):
+                try:
+                    self.vectorizers[attribute].fit(attribute_texts[attribute])
+                except ValueError:
+                    logger.warning(
+                        "Unable to train CountVectorizer for message attribute {}. "
+                        "Leaving an untrained CountVectorizer for it".format(attribute)
+                    )
+            else:
+                logger.debug(
+                    "No text provided for {} attribute in any messages of training data. Skipping "
+                    "training a CountVectorizer for it.".format(attribute)
                 )
 
     def _get_featurized_attribute(