From 7b505141e7ce3357b33071a722560330f694c918 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Mon, 28 Jan 2019 11:15:53 +0100 Subject: [PATCH 1/3] skip training data validation for evaluation --- rasa_nlu/evaluate.py | 3 ++- rasa_nlu/training_data/loading.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/rasa_nlu/evaluate.py b/rasa_nlu/evaluate.py index 756a98acd98c..70e6c5bde994 100644 --- a/rasa_nlu/evaluate.py +++ b/rasa_nlu/evaluate.py @@ -714,7 +714,8 @@ def run_evaluation(data_path, model, else: interpreter = Interpreter.load(model, component_builder) test_data = training_data.load_data(data_path, - interpreter.model_metadata.language) + interpreter.model_metadata.language, + validate=False) extractors = get_entity_extractors(interpreter) entity_predictions, tokens = get_entity_predictions(interpreter, test_data) diff --git a/rasa_nlu/training_data/loading.py b/rasa_nlu/training_data/loading.py index e6f2b5a1fd93..00165318e352 100644 --- a/rasa_nlu/training_data/loading.py +++ b/rasa_nlu/training_data/loading.py @@ -13,6 +13,7 @@ if typing.TYPE_CHECKING: from rasa_nlu.training_data import TrainingData + from rasa_nlu.training_data.formats.readerwriter import TrainingDataReader logger = logging.getLogger(__name__) @@ -40,7 +41,8 @@ def load_data(resource_name: Text, - language: Optional[Text] = 'en') -> 'TrainingData': + language: Optional[Text] = 'en', + validate: bool = True) -> 'TrainingData': """Load training data from disk. Merges them if loaded from disk and multiple files are found.""" @@ -56,7 +58,8 @@ def load_data(resource_name: Text, else: training_data = data_sets[0].merge(*data_sets[1:]) - training_data.validate() + if validate: + training_data.validate() return training_data @@ -80,7 +83,7 @@ def load_data_from_endpoint(data_endpoint: EndpointConfig, "from URL:\n{}".format(e)) -def _reader_factory(fformat): +def _reader_factory(fformat: Text) -> Optional['TrainingDataReader']: """Generates the appropriate reader class based on the file format.""" from rasa_nlu.training_data.formats import ( MarkdownReader, WitReader, LuisReader, @@ -100,7 +103,8 @@ def _reader_factory(fformat): return reader -def _load(filename, language='en'): +def _load(filename: Text, language: Optional[Text] = 'en' + ) -> Optional['TrainingData']: """Loads a single training data file from disk.""" fformat = _guess_format(filename) From b89d9d49bb65c7088840287dc1398ed96033eb47 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Mon, 28 Jan 2019 11:17:08 +0100 Subject: [PATCH 2/3] update changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ad012dae4dfe..24ae9e797e98 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,6 +12,7 @@ Added Changed ------- +- skip training data validation for evaluation Removed ------- From a162f6df635df18c1f6d3904e28fe95766dcf8ae Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 29 Jan 2019 11:15:28 +0100 Subject: [PATCH 3/3] fix changelog message --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 24ae9e797e98..8275a4e75895 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -12,7 +12,7 @@ Added Changed ------- -- skip training data validation for evaluation +- skip test data validation for evaluation Removed -------