Skip to content

Commit

Permalink
add check for empty text of a particular attribute
Browse files Browse the repository at this point in the history
  • Loading branch information
dakshvar22 committed Sep 6, 2019
1 parent 103e707 commit ebf556e
Showing 1 changed file with 16 additions and 7 deletions.
23 changes: 16 additions & 7 deletions rasa/nlu/featurizers/count_vectors_featurizer.py
Expand Up @@ -433,6 +433,10 @@ def _train_with_shared_vocab(self, attribute_texts: Dict[Text, List[Text]]):
"Unable to train a shared CountVectorizer. Leaving an untrained CountVectorizer"
)

@staticmethod
def _attribute_texts_is_non_empty(attribute_texts):
return any(text for text in attribute_texts)

def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]]):
"""Construct the vectorizers and train them with an independent vocab"""

Expand All @@ -449,13 +453,18 @@ def _train_with_independent_vocab(self, attribute_texts: Dict[Text, List[Text]])
)

for attribute in MESSAGE_ATTRIBUTES:

try:
self.vectorizers[attribute].fit(attribute_texts[attribute])
except ValueError:
logger.warning(
"Unable to train CountVectorizer for message attribute {}. "
"Leaving an untrained CountVectorizer for it".format(attribute)
if self._attribute_texts_is_non_empty(attribute_texts[attribute]):
try:
self.vectorizers[attribute].fit(attribute_texts[attribute])
except ValueError:
logger.warning(
"Unable to train CountVectorizer for message attribute {}. "
"Leaving an untrained CountVectorizer for it".format(attribute)
)
else:
logger.debug(
"No text provided for {} attribute in any messages of training data. Skipping "
"training a CountVectorizer for it.".format(attribute)
)

def _get_featurized_attribute(
Expand Down

0 comments on commit ebf556e

Please sign in to comment.