Skip to content

Commit

Permalink
Merge af3f0bc into 644f247
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurTemporim committed Jul 5, 2019
2 parents 644f247 + af3f0bc commit 1da7290
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 49 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Expand Up @@ -33,6 +33,7 @@ Fixed
-----
- all temporal model files are now deleted after stopping the Rasa server
- ``rasa shell nlu`` now outputs unicode characters instead of ``\uxxxx`` codes
- ``rasa train`` now also includes NLU files in other formats than the Rasa format


[1.1.4] - 2019-06-18
Expand Down
2 changes: 1 addition & 1 deletion rasa/core/domain.py
Expand Up @@ -6,7 +6,6 @@
from typing import Any, Dict, List, Optional, Text, Tuple, Union, Set

import rasa.utils.io
from rasa import data
from rasa.cli.utils import bcolors
from rasa.constants import DOMAIN_SCHEMA_FILE
from rasa.core import utils
Expand Down Expand Up @@ -131,6 +130,7 @@ def from_directory(
cls, path: Text, skill_imports: Optional[SkillSelector] = None
) -> "Domain":
"""Loads and merges multiple domain files recursively from a directory tree."""
from rasa import data

domain = Domain.empty()
skill_imports = skill_imports or SkillSelector.all_skills()
Expand Down
6 changes: 3 additions & 3 deletions rasa/core/training/interactive.py
Expand Up @@ -58,7 +58,7 @@
from rasa.utils.endpoints import EndpointConfig

# noinspection PyProtectedMember
from rasa.nlu.training_data.loading import _guess_format, load_data
from rasa.nlu.training_data import loading
from rasa.nlu.training_data.message import Message

# WARNING: This command line UI is using an external library
Expand Down Expand Up @@ -776,7 +776,7 @@ async def _write_nlu_to_file(

# noinspection PyBroadException
try:
previous_examples = load_data(export_nlu_path)
previous_examples = loading.load_data(export_nlu_path)
except Exception as e:
logger.exception("An exception occurred while trying to load the NLU data.")

Expand All @@ -797,7 +797,7 @@ async def _write_nlu_to_file(

# need to guess the format of the file before opening it to avoid a read
# in a write
if _guess_format(export_nlu_path) in {"md", "unk"}:
if loading.guess_format(export_nlu_path) in {"md", "unk"}:
fformat = "md"
else:
fformat = "json"
Expand Down
38 changes: 12 additions & 26 deletions rasa/data.py
Expand Up @@ -4,10 +4,9 @@
import tempfile
import uuid
import typing
from typing import Tuple, List, Text, Set, Union, Optional
import re

import rasa.utils.io as io_utils
from typing import Tuple, List, Text, Set, Union, Optional
from rasa.nlu.training_data import loading

logger = logging.getLogger(__name__)

Expand All @@ -23,8 +22,8 @@ def get_core_directory(
Args:
paths: List of paths to training files or folders containing them.
skill_imports: `SkillSelector` instance which determines which files should
be loaded.
skill_imports: `SkillSelector` instance which determines which files
should be loaded.
Returns:
Path to temporary directory containing all found Core training files.
Expand All @@ -41,8 +40,8 @@ def get_nlu_directory(
Args:
paths: List of paths to training files or folders containing them.
skill_imports: `SkillSelector` instance which determines which files should
be loaded.
skill_imports: `SkillSelector` instance which determines which files
should be loaded.
Returns:
Path to temporary directory containing all found NLU training files.
Expand All @@ -59,8 +58,8 @@ def get_core_nlu_directories(
Args:
paths: List of paths to training files or folders containing them.
skill_imports: `SkillSelector` instance which determines which files should
be loaded.
skill_imports: `SkillSelector` instance which determines which files
should be loaded.
Returns:
Path to directory containing the Core files and path to directory
Expand All @@ -83,8 +82,8 @@ def get_core_nlu_files(
Args:
paths: List of paths to training files or folders containing them.
skill_imports: `SkillSelector` instance which determines which files should
be loaded.
skill_imports: `SkillSelector` instance which determines which files
should be loaded.
Returns:
Tuple of paths to story and NLU files.
Expand Down Expand Up @@ -156,21 +155,8 @@ def _is_valid_filetype(path: Text) -> bool:


def _is_nlu_file(file_path: Text) -> bool:
with open(file_path, encoding="utf-8") as f:
if file_path.endswith(".json"):
content = io_utils.read_json_file(file_path)
is_nlu_file = (
isinstance(content, dict) and content.get("rasa_nlu_data") is not None
)
else:
is_nlu_file = any(_contains_nlu_pattern(l) for l in f)
return is_nlu_file


def _contains_nlu_pattern(text: Text) -> bool:
nlu_pattern = r"\s*##\s*(intent|regex||synonym|lookup):"

return re.match(nlu_pattern, text) is not None
"""Checks whether a file is an NLU file."""
return loading.guess_format(file_path) != loading.UNK


def _is_story_file(file_path: Text) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion rasa/nlu/test.py
Expand Up @@ -19,7 +19,6 @@

from rasa.constants import TEST_DATA_FILE, TRAIN_DATA_FILE
from rasa.model import get_model
from rasa.train import train_nlu
from rasa.utils.io import create_path
from rasa.nlu import config, training_data, utils
from rasa.nlu.utils import write_to_file
Expand Down Expand Up @@ -939,6 +938,8 @@ def compare_nlu(
Returns: training examples per run
"""

from rasa.train import train_nlu

training_examples_per_run = []

for run in range(runs):
Expand Down
15 changes: 11 additions & 4 deletions rasa/nlu/training_data/loading.py
Expand Up @@ -28,8 +28,8 @@
WIT = "wit"
LUIS = "luis"
RASA = "rasa_nlu"
UNK = "unk"
MARKDOWN = "md"
UNK = "unk"
DIALOGFLOW_RELEVANT = {DIALOGFLOW_ENTITIES, DIALOGFLOW_INTENT}

_markdown_section_markers = ["## {}:".format(s) for s in markdown.available_sections]
Expand Down Expand Up @@ -110,7 +110,7 @@ def _reader_factory(fformat: Text) -> Optional["TrainingDataReader"]:
def _load(filename: Text, language: Optional[Text] = "en") -> Optional["TrainingData"]:
"""Loads a single training data file from disk."""

fformat = _guess_format(filename)
fformat = guess_format(filename)
if fformat == UNK:
raise ValueError("Unknown data format for file {}".format(filename))

Expand All @@ -123,8 +123,15 @@ def _load(filename: Text, language: Optional[Text] = "en") -> Optional["Training
return None


def _guess_format(filename: Text) -> Text:
"""Applies heuristics to guess the data format of a file."""
def guess_format(filename: Text) -> Text:
"""Applies heuristics to guess the data format of a file.
Args:
filename: file whose type should be guessed
Returns:
Guessed file format.
"""
guess = UNK
content = rasa.utils.io.read_file(filename)
try:
Expand Down
4 changes: 2 additions & 2 deletions rasa/nlu/training_data/util.py
Expand Up @@ -30,14 +30,14 @@ def check_duplicate_synonym(entity_synonyms, text, syn, context_str=""):


def get_file_format(resource_name: Text) -> Text:
from rasa.nlu.training_data.loading import _guess_format
from rasa.nlu.training_data import loading

if resource_name is None or not os.path.exists(resource_name):
raise AttributeError("Resource '{}' does not exist.".format(resource_name))

files = utils.list_files(resource_name)

file_formats = list(map(lambda f: _guess_format(f), files))
file_formats = list(map(lambda f: loading.guess_format(f), files))

if not file_formats:
return "json"
Expand Down
50 changes: 38 additions & 12 deletions tests/core/test_data.py
Expand Up @@ -106,16 +106,47 @@ def test_same_file_names_get_resolved(tmpdir):


@pytest.mark.parametrize(
"line",
"test_input,expected",
[
"##intent:aintent",
"##synonym: synonym",
"##regex:a_regex",
" ##lookup:additional",
(
"dialogflow",
{
"data/examples/dialogflow/entities/cuisine.json",
"data/examples/dialogflow/intents/affirm.json",
"data/examples/dialogflow/entities/location_entries_es.json",
"data/examples/dialogflow/intents/affirm_usersays_en.json",
"data/examples/dialogflow/intents/hi_usersays_es.json",
"data/examples/dialogflow/entities/cuisine_entries_es.json",
"data/examples/dialogflow/intents/inform_usersays_en.json",
"data/examples/dialogflow/intents/hi.json",
"data/examples/dialogflow/intents/goodbye_usersays_en.json",
"data/examples/dialogflow/agent.json",
"data/examples/dialogflow/intents/hi_usersays_en.json",
"data/examples/dialogflow/entities/location.json",
"data/examples/dialogflow/intents/affirm_usersays_es.json",
"data/examples/dialogflow/entities/cuisine_entries_en.json",
"data/examples/dialogflow/package.json",
"data/examples/dialogflow/intents/Default Fallback Intent.json",
"data/examples/dialogflow/intents/goodbye_usersays_es.json",
"data/examples/dialogflow/intents/goodbye.json",
"data/examples/dialogflow/entities/location_entries_en.json",
"data/examples/dialogflow/intents/inform.json",
"data/examples/dialogflow/intents/inform_usersays_es.json",
},
),
("luis", {"data/examples/luis/demo-restaurants.json"}),
(
"rasa",
{"data/examples/rasa/demo-rasa.json", "data/examples/rasa/demo-rasa.md"},
),
("wit", {"data/examples/wit/demo-flights.json"}),
],
)
def test_contains_nlu_pattern(line):
assert data._contains_nlu_pattern(line)
def test_find_nlu_files_with_different_formats(test_input, expected):
examples_dir = "data/examples"
data_dir = os.path.join(examples_dir, test_input)
core_files, nlu_files = data.get_core_nlu_files([data_dir])
assert nlu_files == expected


def test_is_nlu_file_with_json():
Expand All @@ -142,8 +173,3 @@ def test_is_not_nlu_file_with_json():
f.write('{"test": "a"}')

assert not data._is_nlu_file(file)


@pytest.mark.parametrize("line", ["- example", "## story intent 1 + two##slots* entry"])
def test_not_contains_nlu_pattern(line):
assert not data._contains_nlu_pattern(line)

0 comments on commit 1da7290

Please sign in to comment.