Skip to content

Commit

Permalink
Merge pull request #4818 from RasaHQ/e2e-literal-payloads
Browse files Browse the repository at this point in the history
E2e literal payloads
  • Loading branch information
wochinge committed Nov 26, 2019
2 parents 7fb4f60 + d9d7bf0 commit 0452099
Show file tree
Hide file tree
Showing 13 changed files with 268 additions and 43 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -23,6 +23,8 @@ Removed

Fixed
-----
- End to end stories now support literal payloads which specify entities, e.g.
``greet: /greet{"name": "John"}``
- Slots will be correctly interpolated if there are lists in custom response templates.

[1.5.0] - 2019-11-26
Expand Down
37 changes: 37 additions & 0 deletions examples/formbot/tests/end-to-end-stories.md
@@ -0,0 +1,37 @@
## Happy path
* greet: hi
- utter_greet
* request_restaurant: im looking for a restaurant
- restaurant_form
- form{"name": "restaurant_form"}
- form{"name": null}
- utter_slots_values
* thankyou: thanks
- utter_noworries

## Happy path with form prefix
* greet: hi
- utter_greet
* request_restaurant: im looking for a restaurant
- restaurant_form
- form{"name": "restaurant_form"}
* form: inform: [afghan](cuisine) food
- form: restaurant_form
- form{"name": null}
- utter_slots_values
* thankyou: thanks
- utter_noworries

## unhappy path
* greet: hi
- utter_greet
* request_restaurant: im looking for a restaurant
- restaurant_form
- form{"name": "restaurant_form"}
* chitchat: can you share your boss with me?
- utter_chitchat
- restaurant_form
- form{"name": null}
- utter_slots_values
* thankyou: thanks
- utter_noworries
2 changes: 1 addition & 1 deletion rasa/core/events/__init__.py
Expand Up @@ -308,7 +308,7 @@ def _from_story_string(cls, parameters: Dict[Text, Any]) -> Optional[List[Event]
except KeyError as e:
raise ValueError(f"Failed to parse bot uttered event. {e}")

def as_story_string(self, e2e=False):
def as_story_string(self, e2e: bool = False) -> Text:
if self.intent:
if self.entities:
ent_string = json.dumps(
Expand Down
10 changes: 10 additions & 0 deletions rasa/core/interpreter.py
Expand Up @@ -160,6 +160,16 @@ async def parse(
) -> Dict[Text, Any]:
"""Parse a text message."""

return self.synchronous_parse(text, message_id, tracker)

def synchronous_parse(
self,
text: Text,
message_id: Optional[Text] = None,
tracker: DialogueStateTracker = None,
) -> Dict[Text, Any]:
"""Parse a text message."""

intent, confidence, entities = self.extract_intent_and_entities(text)

if self._starts_with_intent_prefix(text):
Expand Down
2 changes: 1 addition & 1 deletion rasa/core/trackers.py
Expand Up @@ -425,7 +425,7 @@ def update(self, event: Event, domain: Optional[Domain] = None) -> None:
for e in domain.slots_for_entities(event.parse_data["entities"]):
self.update(e)

def export_stories(self, e2e=False) -> Text:
def export_stories(self, e2e: bool = False) -> Text:
"""Dump the tracker as a story in the Rasa Core story format.
Returns the dumped tracker as a string."""
Expand Down
50 changes: 34 additions & 16 deletions rasa/core/training/dsl.py
Expand Up @@ -31,6 +31,10 @@


class EndToEndReader(MarkdownReader):
def __init__(self) -> None:
super().__init__()
self._regex_interpreter = RegexInterpreter()

def _parse_item(self, line: Text) -> Optional["Message"]:
"""Parses an md list item line based on the current section type.
Expand All @@ -40,22 +44,36 @@ def _parse_item(self, line: Text) -> Optional["Message"]:
DOCS_BASE_URL
)

item_regex = re.compile(r"\s*(.+?):\s*(.*)")
# Match three groups:
# 1) Potential "form" annotation
# 2) The correct intent
# 3) Optional entities
# 4) The message text
form_group = fr"({FORM_PREFIX}\s*)*"
item_regex = re.compile(r"\s*" + form_group + r"([^{}]+?)({.*})*:\s*(.*)")
match = re.match(item_regex, line)
if match:
intent = match.group(1)
self.current_title = intent
message = match.group(2)
example = self._parse_training_example(message)
example.data["true_intent"] = intent
return example

raise ValueError(
"Encountered invalid end-to-end format for message "
"`{}`. Please visit the documentation page on "
"end-to-end evaluation at {}/user-guide/evaluating-models/"
"end-to-end-evaluation/".format(line, DOCS_BASE_URL)
)

if not match:
raise ValueError(
"Encountered invalid end-to-end format for message "
"`{}`. Please visit the documentation page on "
"end-to-end evaluation at {}/user-guide/evaluating-models/"
"end-to-end-evaluation/".format(line, DOCS_BASE_URL)
)

intent = match.group(2)
self.current_title = intent
message = match.group(4)
example = self._parse_training_example(message)

# If the message starts with the `INTENT_MESSAGE_PREFIX` potential entities
# are annotated in the json format (e.g. `/greet{"name": "Rasa"})
if message.startswith(INTENT_MESSAGE_PREFIX):
parsed = self._regex_interpreter.synchronous_parse(message)
example.data["entities"] = parsed["entities"]

example.data["true_intent"] = intent
return example


class StoryStepBuilder:
Expand Down Expand Up @@ -412,7 +430,7 @@ async def add_user_messages(self, messages, line_num):
)
self.current_step_builder.add_user_messages(parsed_messages)

async def add_e2e_messages(self, e2e_messages, line_num):
async def add_e2e_messages(self, e2e_messages: List[Text], line_num: int) -> None:
if not self.current_step_builder:
raise StoryParseError(
"End-to-end message '{}' at invalid "
Expand Down
14 changes: 9 additions & 5 deletions rasa/core/training/structures.py
Expand Up @@ -136,14 +136,18 @@ def add_event(self, event):
self.events.append(event)

@staticmethod
def _checkpoint_string(story_step_element):
def _checkpoint_string(story_step_element: UserUttered) -> Text:
return "> {}\n".format(story_step_element.as_story_string())

@staticmethod
def _user_string(story_step_element, e2e, prefix=""):
def _user_string(
story_step_element: UserUttered, e2e: bool, prefix: Text = ""
) -> Text:
return "* {}{}\n".format(prefix, story_step_element.as_story_string(e2e))

def _store_user_strings(self, story_step_element, e2e, prefix=""):
def _store_user_strings(
self, story_step_element: UserUttered, e2e: bool, prefix: Text = ""
) -> None:
self.story_string_helper.no_form_prefix_string += self._user_string(
story_step_element, e2e
)
Expand All @@ -167,7 +171,7 @@ def _reset_stored_strings(self):
self.story_string_helper.form_prefix_string = ""
self.story_string_helper.no_form_prefix_string = ""

def as_story_string(self, flat=False, e2e=False):
def as_story_string(self, flat: bool = False, e2e: bool = False) -> Text:
# if the result should be flattened, we
# will exclude the caption and any checkpoints.

Expand Down Expand Up @@ -376,7 +380,7 @@ def as_dialogue(self, sender_id, domain):
events.append(ActionExecuted(ACTION_LISTEN_NAME))
return Dialogue(sender_id, events)

def as_story_string(self, flat=False, e2e=False):
def as_story_string(self, flat: bool = False, e2e: bool = False) -> Text:
story_content = ""

# initialize helper for first story step
Expand Down
19 changes: 14 additions & 5 deletions rasa/nlu/training_data/formats/markdown.py
Expand Up @@ -4,6 +4,8 @@
from collections import OrderedDict
from typing import Any, Text, Optional, Tuple, List, Dict

from rasa.core.constants import INTENT_MESSAGE_PREFIX

from rasa.nlu.training_data.formats.readerwriter import (
TrainingDataReader,
TrainingDataWriter,
Expand Down Expand Up @@ -311,13 +313,20 @@ def _generate_message_md(self, message: Dict[Text, Any]) -> Text:

md = ""
text = message.get("text", "")
entities = sorted(message.get("entities", []), key=lambda k: k["start"])

pos = 0
for entity in entities:
md += text[pos : entity["start"]]
md += self._generate_entity_md(text, entity)
pos = entity["end"]

# If a message was prefixed with `INTENT_MESSAGE_PREFIX` (this can only happen
# in end-to-end stories) then potential entities were provided in the json
# format (e.g. `/greet{"name": "Rasa"}) and we don't have to add the NLU
# entity annotation
if not text.startswith(INTENT_MESSAGE_PREFIX):
entities = sorted(message.get("entities", []), key=lambda k: k["start"])

for entity in entities:
md += text[pos : entity["start"]]
md += self._generate_entity_md(text, entity)
pos = entity["end"]

md += text[pos:]

Expand Down
19 changes: 16 additions & 3 deletions tests/core/conftest.py
Expand Up @@ -242,10 +242,23 @@ def trained_model(loop, project) -> Text:


@pytest.fixture
async def restaurantbot(trained_async, tmpdir_factory) -> Text:
async def restaurantbot(trained_async) -> Text:
restaurant_domain = os.path.join(RESTAURANTBOT_PATH, "domain.yml")
restaurant_config = os.path.join(RESTAURANTBOT_PATH, "config.yml")
restaurant_data = os.path.join(RESTAURANTBOT_PATH, "data/")

agent = await trained_async(restaurant_domain, restaurant_config, restaurant_data)
return agent
return await trained_async(restaurant_domain, restaurant_config, restaurant_data)


@pytest.fixture
async def form_bot(trained_async) -> Agent:
zipped_model = await trained_async(
domain="examples/formbot/domain.yml",
config="examples/formbot/config.yml",
training_files=[
"examples/formbot/data/stories.md",
"examples/formbot/data/nlu.md",
],
)

return Agent.load_local_model(zipped_model)
124 changes: 123 additions & 1 deletion tests/core/test_dsl.py
Expand Up @@ -2,12 +2,14 @@

import json
from collections import Counter
from typing import Text, Dict

import numpy as np
import pytest

from rasa.core import training
from rasa.core.interpreter import RegexInterpreter
from rasa.core.training.dsl import StoryFileReader
from rasa.core.training.dsl import StoryFileReader, EndToEndReader
from rasa.core.domain import Domain
from rasa.core.trackers import DialogueStateTracker
from rasa.core.events import (
Expand Down Expand Up @@ -364,3 +366,123 @@ async def test_read_stories_with_multiline_comments(tmpdir, default_domain):
assert len(story_steps[2].events) == 7
assert story_steps[3].block_name == "say goodbye"
assert len(story_steps[3].events) == 2


@pytest.mark.parametrize(
"line, expected",
[
(" greet: hi", {"intent": "greet", "true_intent": "greet", "text": "hi"}),
(
" greet: /greet",
{
"intent": "greet",
"true_intent": "greet",
"text": "/greet",
"entities": [],
},
),
(
'greet: /greet{"test": "test"}',
{
"intent": "greet",
"entities": [
{"entity": "test", "start": 6, "end": 22, "value": "test"}
],
"true_intent": "greet",
"text": '/greet{"test": "test"}',
},
),
(
'greet{"test": "test"}: /greet{"test": "test"}',
{
"intent": "greet",
"entities": [
{"entity": "test", "start": 6, "end": 22, "value": "test"}
],
"true_intent": "greet",
"text": '/greet{"test": "test"}',
},
),
(
"mood_great: [great](feeling)",
{
"intent": "mood_great",
"entities": [
{"start": 0, "end": 5, "value": "great", "entity": "feeling"}
],
"true_intent": "mood_great",
"text": "great",
},
),
(
'form: greet{"test": "test"}: /greet{"test": "test"}',
{
"intent": "greet",
"entities": [
{"end": 22, "entity": "test", "start": 6, "value": "test"}
],
"true_intent": "greet",
"text": '/greet{"test": "test"}',
},
),
],
)
def test_e2e_parsing(line: Text, expected: Dict):
reader = EndToEndReader()
actual = reader._parse_item(line)

assert actual.as_dict() == expected


@pytest.mark.parametrize(
"parse_data, expected_story_string",
[
(
{
"text": "/simple",
"parse_data": {
"intent": {"confidence": 1.0, "name": "simple"},
"entities": [
{"start": 0, "end": 5, "value": "great", "entity": "feeling"}
],
},
},
"simple: /simple",
),
(
{
"text": "great",
"parse_data": {
"intent": {"confidence": 1.0, "name": "simple"},
"entities": [
{"start": 0, "end": 5, "value": "great", "entity": "feeling"}
],
},
},
"simple: [great](feeling)",
),
(
{
"text": "great",
"parse_data": {
"intent": {"confidence": 1.0, "name": "simple"},
"entities": [],
},
},
"simple: great",
),
],
)
def test_user_uttered_to_e2e(parse_data: Dict, expected_story_string: Text):
event = UserUttered.from_story_string("user", parse_data)[0]

assert isinstance(event, UserUttered)
assert event.as_story_string(e2e=True) == expected_story_string


@pytest.mark.parametrize("line", [" greet{: hi"])
def test_invalid_end_to_end_format(line: Text):
reader = EndToEndReader()

with pytest.raises(ValueError):
_ = reader._parse_item(line)

0 comments on commit 0452099

Please sign in to comment.