diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 493e44a86ccd..574b8bd7473d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Added Changed ------- +- skip test data validation for evaluation - applied spacy guidelines on how to disable pipeline components Removed diff --git a/rasa_nlu/evaluate.py b/rasa_nlu/evaluate.py index 756a98acd98c..70e6c5bde994 100644 --- a/rasa_nlu/evaluate.py +++ b/rasa_nlu/evaluate.py @@ -714,7 +714,8 @@ def run_evaluation(data_path, model, else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, - interpreter.model_metadata.language) + interpreter.model_metadata.language, + validate=False) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) diff --git a/rasa_nlu/training_data/loading.py b/rasa_nlu/training_data/loading.py index e6f2b5a1fd93..00165318e352 100644 --- a/rasa_nlu/training_data/loading.py +++ b/rasa_nlu/training_data/loading.py @@ -13,6 +13,7 @@ if typing.TYPE_CHECKING: from rasa_nlu.training_data import TrainingData + from rasa_nlu.training_data.formats.readerwriter import TrainingDataReader logger = logging.getLogger(__name__) @@ -40,7 +41,8 @@ def load_data(resource_name: Text, - language: Optional[Text] = 'en') -> 'TrainingData': + language: Optional[Text] = 'en', + validate: bool = True) -> 'TrainingData': """Load training data from disk. Merges them if loaded from disk and multiple files are found.""" @@ -56,7 +58,8 @@ def load_data(resource_name: Text, else: training_data = data_sets[0].merge(*data_sets[1:]) - training_data.validate() + if validate: + training_data.validate() return training_data @@ -80,7 +83,7 @@ def load_data_from_endpoint(data_endpoint: EndpointConfig, "from URL:\n{}".format(e)) -def _reader_factory(fformat): +def _reader_factory(fformat: Text) -> Optional['TrainingDataReader']: """Generates the appropriate reader class based on the file format.""" from rasa_nlu.training_data.formats import ( MarkdownReader, WitReader, LuisReader, @@ -100,7 +103,8 @@ def _reader_factory(fformat): return reader -def _load(filename, language='en'): +def _load(filename: Text, language: Optional[Text] = 'en' + ) -> Optional['TrainingData']: """Loads a single training data file from disk.""" fformat = _guess_format(filename)