Merge af3f0bc into 644f247

RasaHQ · Jul 5, 2019 · 1da7290 · 1da7290
2 parents 644f247 + af3f0bc
commit 1da7290
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 49 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -33,6 +33,7 @@ Fixed
 -----
 - all temporal model files are now deleted after stopping the Rasa server
 - ``rasa shell nlu`` now outputs unicode characters instead of ``\uxxxx`` codes
+- ``rasa train`` now also includes NLU files in other formats than the Rasa format
 
 
 [1.1.4] - 2019-06-18

diff --git a/rasa/core/domain.py b/rasa/core/domain.py
@@ -6,7 +6,6 @@
 from typing import Any, Dict, List, Optional, Text, Tuple, Union, Set
 
 import rasa.utils.io
-from rasa import data
 from rasa.cli.utils import bcolors
 from rasa.constants import DOMAIN_SCHEMA_FILE
 from rasa.core import utils
@@ -131,6 +130,7 @@ def from_directory(
         cls, path: Text, skill_imports: Optional[SkillSelector] = None
     ) -> "Domain":
         """Loads and merges multiple domain files recursively from a directory tree."""
+        from rasa import data
 
         domain = Domain.empty()
         skill_imports = skill_imports or SkillSelector.all_skills()

diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py
@@ -58,7 +58,7 @@
 from rasa.utils.endpoints import EndpointConfig
 
 # noinspection PyProtectedMember
-from rasa.nlu.training_data.loading import _guess_format, load_data
+from rasa.nlu.training_data import loading
 from rasa.nlu.training_data.message import Message
 
 # WARNING: This command line UI is using an external library
@@ -776,7 +776,7 @@ async def _write_nlu_to_file(
 
     # noinspection PyBroadException
     try:
-        previous_examples = load_data(export_nlu_path)
+        previous_examples = loading.load_data(export_nlu_path)
     except Exception as e:
         logger.exception("An exception occurred while trying to load the NLU data.")
 
@@ -797,7 +797,7 @@ async def _write_nlu_to_file(
 
     # need to guess the format of the file before opening it to avoid a read
     # in a write
-    if _guess_format(export_nlu_path) in {"md", "unk"}:
+    if loading.guess_format(export_nlu_path) in {"md", "unk"}:
         fformat = "md"
     else:
         fformat = "json"

diff --git a/rasa/data.py b/rasa/data.py
@@ -4,10 +4,9 @@
 import tempfile
 import uuid
 import typing
-from typing import Tuple, List, Text, Set, Union, Optional
 import re
-
-import rasa.utils.io as io_utils
+from typing import Tuple, List, Text, Set, Union, Optional
+from rasa.nlu.training_data import loading
 
 logger = logging.getLogger(__name__)
 
@@ -23,8 +22,8 @@ def get_core_directory(
 
     Args:
         paths: List of paths to training files or folders containing them.
-        skill_imports: `SkillSelector` instance which determines which files should
-                       be loaded.
+        skill_imports: `SkillSelector` instance which determines which files
+                        should be loaded.
 
     Returns:
         Path to temporary directory containing all found Core training files.
@@ -41,8 +40,8 @@ def get_nlu_directory(
 
     Args:
         paths: List of paths to training files or folders containing them.
-        skill_imports: `SkillSelector` instance which determines which files should
-                       be loaded.
+        skill_imports: `SkillSelector` instance which determines which files
+                        should be loaded.
 
     Returns:
         Path to temporary directory containing all found NLU training files.
@@ -59,8 +58,8 @@ def get_core_nlu_directories(
 
     Args:
         paths: List of paths to training files or folders containing them.
-        skill_imports: `SkillSelector` instance which determines which files should
-                       be loaded.
+        skill_imports: `SkillSelector` instance which determines which files
+                        should be loaded.
 
     Returns:
         Path to directory containing the Core files and path to directory
@@ -83,8 +82,8 @@ def get_core_nlu_files(
 
     Args:
         paths: List of paths to training files or folders containing them.
-        skill_imports: `SkillSelector` instance which determines which files should
-                       be loaded.
+        skill_imports: `SkillSelector` instance which determines which files
+                        should be loaded.
 
     Returns:
         Tuple of paths to story and NLU files.
@@ -156,21 +155,8 @@ def _is_valid_filetype(path: Text) -> bool:
 
 
 def _is_nlu_file(file_path: Text) -> bool:
-    with open(file_path, encoding="utf-8") as f:
-        if file_path.endswith(".json"):
-            content = io_utils.read_json_file(file_path)
-            is_nlu_file = (
-                isinstance(content, dict) and content.get("rasa_nlu_data") is not None
-            )
-        else:
-            is_nlu_file = any(_contains_nlu_pattern(l) for l in f)
-    return is_nlu_file
-
-
-def _contains_nlu_pattern(text: Text) -> bool:
-    nlu_pattern = r"\s*##\s*(intent|regex||synonym|lookup):"
-
-    return re.match(nlu_pattern, text) is not None
+    """Checks whether a file is an NLU file."""
+    return loading.guess_format(file_path) != loading.UNK
 
 
 def _is_story_file(file_path: Text) -> bool:

diff --git a/rasa/nlu/test.py b/rasa/nlu/test.py
@@ -19,7 +19,6 @@
 
 from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE
 from rasa.model import get_model
-from rasa.train import train_nlu
 from rasa.utils.io import create_path
 from rasa.nlu import config, training_data, utils
 from rasa.nlu.utils import write_to_file
@@ -939,6 +938,8 @@ def compare_nlu(
     Returns: training examples per run
     """
 
+    from rasa.train import train_nlu
+
     training_examples_per_run = []
 
     for run in range(runs):

diff --git a/rasa/nlu/training_data/loading.py b/rasa/nlu/training_data/loading.py
@@ -28,8 +28,8 @@
 WIT = "wit"
 LUIS = "luis"
 RASA = "rasa_nlu"
-UNK = "unk"
 MARKDOWN = "md"
+UNK = "unk"
 DIALOGFLOW_RELEVANT = {DIALOGFLOW_ENTITIES, DIALOGFLOW_INTENT}
 
 _markdown_section_markers = ["## {}:".format(s) for s in markdown.available_sections]
@@ -110,7 +110,7 @@ def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
 def _load(filename: Text, language: Optional[Text] = "en") -> Optional["TrainingData"]:
     """Loads a single training data file from disk."""
 
-    fformat = _guess_format(filename)
+    fformat = guess_format(filename)
     if fformat == UNK:
         raise ValueError("Unknown data format for file {}".format(filename))
 
@@ -123,8 +123,15 @@ def _load(filename: Text, language: Optional[Text] = "en") -> Optional["Training
         return None
 
 
-def _guess_format(filename: Text) -> Text:
-    """Applies heuristics to guess the data format of a file."""
+def guess_format(filename: Text) -> Text:
+    """Applies heuristics to guess the data format of a file.
+
+    Args:
+        filename: file whose type should be guessed
+
+    Returns:
+        Guessed file format.
+    """
     guess = UNK
     content = rasa.utils.io.read_file(filename)
     try:

diff --git a/rasa/nlu/training_data/util.py b/rasa/nlu/training_data/util.py
@@ -30,14 +30,14 @@ def check_duplicate_synonym(entity_synonyms, text, syn, context_str=""):
 
 
 def get_file_format(resource_name: Text) -> Text:
-    from rasa.nlu.training_data.loading import _guess_format
+    from rasa.nlu.training_data import loading
 
     if resource_name is None or not os.path.exists(resource_name):
         raise AttributeError("Resource '{}' does not exist.".format(resource_name))
 
     files = utils.list_files(resource_name)
 
-    file_formats = list(map(lambda f: _guess_format(f), files))
+    file_formats = list(map(lambda f: loading.guess_format(f), files))
 
     if not file_formats:
         return "json"

diff --git a/tests/core/test_data.py b/tests/core/test_data.py
@@ -106,16 +106,47 @@ def test_same_file_names_get_resolved(tmpdir):
 
 
 @pytest.mark.parametrize(
-    "line",
+    "test_input,expected",
     [
-        "##intent:aintent",
-        "##synonym: synonym",
-        "##regex:a_regex",
-        " ##lookup:additional",
+        (
+            "dialogflow",
+            {
+                "data/examples/dialogflow/entities/cuisine.json",
+                "data/examples/dialogflow/intents/affirm.json",
+                "data/examples/dialogflow/entities/location_entries_es.json",
+                "data/examples/dialogflow/intents/affirm_usersays_en.json",
+                "data/examples/dialogflow/intents/hi_usersays_es.json",
+                "data/examples/dialogflow/entities/cuisine_entries_es.json",
+                "data/examples/dialogflow/intents/inform_usersays_en.json",
+                "data/examples/dialogflow/intents/hi.json",
+                "data/examples/dialogflow/intents/goodbye_usersays_en.json",
+                "data/examples/dialogflow/agent.json",
+                "data/examples/dialogflow/intents/hi_usersays_en.json",
+                "data/examples/dialogflow/entities/location.json",
+                "data/examples/dialogflow/intents/affirm_usersays_es.json",
+                "data/examples/dialogflow/entities/cuisine_entries_en.json",
+                "data/examples/dialogflow/package.json",
+                "data/examples/dialogflow/intents/Default Fallback Intent.json",
+                "data/examples/dialogflow/intents/goodbye_usersays_es.json",
+                "data/examples/dialogflow/intents/goodbye.json",
+                "data/examples/dialogflow/entities/location_entries_en.json",
+                "data/examples/dialogflow/intents/inform.json",
+                "data/examples/dialogflow/intents/inform_usersays_es.json",
+            },
+        ),
+        ("luis", {"data/examples/luis/demo-restaurants.json"}),
+        (
+            "rasa",
+            {"data/examples/rasa/demo-rasa.json", "data/examples/rasa/demo-rasa.md"},
+        ),
+        ("wit", {"data/examples/wit/demo-flights.json"}),
     ],
 )
-def test_contains_nlu_pattern(line):
-    assert data._contains_nlu_pattern(line)
+def test_find_nlu_files_with_different_formats(test_input, expected):
+    examples_dir = "data/examples"
+    data_dir = os.path.join(examples_dir, test_input)
+    core_files, nlu_files = data.get_core_nlu_files([data_dir])
+    assert nlu_files == expected
 
 
 def test_is_nlu_file_with_json():
@@ -142,8 +173,3 @@ def test_is_not_nlu_file_with_json():
         f.write('{"test": "a"}')
 
     assert not data._is_nlu_file(file)
-
-
-@pytest.mark.parametrize("line", ["- example", "## story intent 1 + two##slots* entry"])
-def test_not_contains_nlu_pattern(line):
-    assert not data._contains_nlu_pattern(line)