Skip to content

Commit

Permalink
improved build speed by reusing trained models
Browse files Browse the repository at this point in the history
  • Loading branch information
tmbo committed Feb 13, 2020
1 parent 7d7178c commit 7e1e1b7
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 178 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/continous-integration.yml
@@ -1,4 +1,4 @@
name: Continous Integration
name: Continuous Integration

on: [push]

Expand All @@ -18,7 +18,7 @@ env:

jobs:
api:
name: API specification
name: Test API specification
runs-on: ubuntu-latest

steps:
Expand Down Expand Up @@ -177,6 +177,7 @@ jobs:
GITHUB_TAG: ${{ github.ref }}
GITHUB_REPO_SLUG: ${{ github.repository }}
run: |
sudo apt-get update
sudo apt-get -y install pandoc
pip install -U github3.py pypandoc
python3 scripts/publish_gh_release_notes.py
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/documentation.yml
Expand Up @@ -43,7 +43,7 @@ jobs:
curl -sSL -o $RASABASTER "https://storage.googleapis.com/docs-theme/${RASABASTER}?q=$(date +%s%N)"
pip install $RASABASTER
pip install -r requirements.txt
pip install --no-cache-dir -r requirements-docs.txt
pip install -r requirements-docs.txt
pip install git+https://${GITHUB_TOKEN}:x-oauth-basic@github.com/RasaHQ/sphinxcontrib-versioning.git@version_list
pip install -e .
pip list
Expand Down
3 changes: 2 additions & 1 deletion docs/conf.py
Expand Up @@ -336,7 +336,8 @@
scv_whitelist_tags = (
re.compile(r"^[2-9]+\.\d+\.\d+$"),
re.compile(r"^1\.[1-9][0-9]+\.\d+$"),
re.compile(r"^1\.[6789]\.\d+$"),
re.compile(r"^1\.[789]\.\d+$"),
re.compile(r"^1\.6\.2$"),
re.compile(r"^1\.5\.3$"),
re.compile(r"^1\.4\.6$"),
re.compile(r"^1\.3\.10$"),
Expand Down
2 changes: 1 addition & 1 deletion scripts/publish_gh_release_notes.py
@@ -1,6 +1,6 @@
"""
Script used to publish GitHub release notes extracted from CHANGELOG.rst.
This script is executed by Github after a new release was successfully built.
This script is executed by GitHub after a new release was successfully built.
Uses the following environment variables:
* GITHUB_TAG: the name of the tag of the current commit.
Expand Down
27 changes: 25 additions & 2 deletions tests/cli/conftest.py
@@ -1,3 +1,8 @@
import shutil

from subprocess import check_call

from _pytest.tmpdir import TempdirFactory
from typing import Callable
import pytest
import os
Expand All @@ -22,10 +27,28 @@ def do_run(*args, stdin):
return do_run


@pytest.fixture(scope="session")
def init_default_project(tmpdir_factory: TempdirFactory) -> str:
path = tmpdir_factory.mktemp("agent").strpath
os.environ["LOG_LEVEL"] = "ERROR"

check_call(["rasa", "init", "--no-prompt"], cwd=path)
return path


@pytest.fixture
def run_in_default_project(testdir: Testdir) -> Callable[..., RunResult]:
def run_in_default_project(
testdir: Testdir, init_default_project: str
) -> Callable[..., RunResult]:
os.environ["LOG_LEVEL"] = "ERROR"
testdir.run("rasa", "init", "--no-prompt")

# makes sure we do not always retrain an initial model for every "new" project
for file_name in os.listdir(init_default_project):
full_file_name = os.path.join(init_default_project, file_name)
if os.path.isfile(full_file_name):
shutil.copy(full_file_name, str(testdir.tmpdir))
else:
shutil.copytree(full_file_name, str(testdir.tmpdir / file_name))

def do_run(*args):
args = ["rasa"] + list(args)
Expand Down
56 changes: 39 additions & 17 deletions tests/conftest.py
@@ -1,9 +1,10 @@
import logging
from contextlib import contextmanager
from typing import Text, List
import asyncio
from sanic.request import Request
from sanic.testing import SanicTestClient

from typing import Text, List, Tuple, Iterator

import pytest
from _pytest.logging import LogCaptureFixture
from _pytest.tmpdir import TempdirFactory
from sanic import Sanic

Expand All @@ -12,9 +13,11 @@
from rasa.core.agent import Agent, load_agent
from rasa.core.channels import channel
from rasa.core.channels.channel import RestInput
from rasa.core.domain import SessionConfig
from rasa.core.policies import Policy
from rasa.core.policies.memoization import AugmentedMemoizationPolicy
from rasa.core.run import _create_app_without_api
from rasa.core.tracker_store import InMemoryTrackerStore
from rasa.model import get_model
from rasa.train import train_async
from rasa.utils.common import TempDirectoryPath
Expand All @@ -33,13 +36,22 @@
# from a separatedly installable pytest-cli plugin.
pytest_plugins = ["pytester"]

# https://github.com/pytest-dev/pytest-asyncio/issues/68
# this event_loop is used by pytest-asyncio, and redefining it
# is currently the only way of changing the scope of this fixture
@pytest.yield_fixture(scope="session")
def event_loop(request: Request) -> Iterator[asyncio.AbstractEventLoop]:
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()

@pytest.fixture
async def default_agent(tmpdir_factory: TempdirFactory) -> Agent:

@pytest.fixture(scope="session")
async def _trained_default_agent(tmpdir_factory: TempdirFactory) -> Tuple[Agent, str]:
model_path = tmpdir_factory.mktemp("model").strpath

agent = Agent(
"data/test_domains/default.yml",
"data/test_domains/default_with_slots.yml",
policies=[AugmentedMemoizationPolicy(max_history=3)],
)

Expand All @@ -49,6 +61,18 @@ async def default_agent(tmpdir_factory: TempdirFactory) -> Agent:
return agent


def reset_conversation_state(agent: Agent) -> Agent:
# Clean tracker store after each test so tests don't affect each other
agent.tracker_store = InMemoryTrackerStore(agent.domain)
agent.domain.session_config = SessionConfig.default()
return agent


@pytest.fixture
async def default_agent(_trained_default_agent: Agent) -> Agent:
return reset_conversation_state(_trained_default_agent)


@pytest.fixture(scope="session")
async def trained_moodbot_path() -> Text:
return await train_async(
Expand All @@ -66,17 +90,17 @@ async def unpacked_trained_moodbot_path(
return get_model(trained_moodbot_path)


@pytest.fixture
@pytest.fixture(scope="session")
async def stack_agent(trained_rasa_model: Text) -> Agent:
return await load_agent(model_path=trained_rasa_model)


@pytest.fixture
@pytest.fixture(scope="session")
async def core_agent(trained_core_model: Text) -> Agent:
return await load_agent(model_path=trained_core_model)


@pytest.fixture
@pytest.fixture(scope="session")
async def nlu_agent(trained_nlu_model: Text) -> Agent:
return await load_agent(model_path=trained_nlu_model)

Expand Down Expand Up @@ -122,28 +146,26 @@ async def _train(*args, output_path=None, **kwargs):
return _train


@pytest.fixture()
@pytest.fixture(scope="session")
async def trained_rasa_model(
trained_async,
default_domain_path: Text,
default_config: List[Policy],
default_nlu_data: Text,
default_stories_file: Text,
) -> Text:
trained_stack_model_path = await trained_async(
domain="data/test_domains/default.yml",
domain=default_domain_path,
config=DEFAULT_STACK_CONFIG,
training_files=[default_nlu_data, default_stories_file],
)

return trained_stack_model_path


@pytest.fixture()
@pytest.fixture(scope="session")
async def trained_core_model(
trained_async,
default_domain_path: Text,
default_config: List[Policy],
default_nlu_data: Text,
default_stories_file: Text,
) -> Text:
Expand All @@ -156,7 +178,7 @@ async def trained_core_model(
return trained_core_model_path


@pytest.fixture()
@pytest.fixture(scope="session")
async def trained_nlu_model(
trained_async,
default_domain_path: Text,
Expand Down Expand Up @@ -208,7 +230,7 @@ async def rasa_server_without_api() -> Sanic:
return app


def get_test_client(server):
def get_test_client(server: Sanic) -> SanicTestClient:
test_client = server.test_client
test_client.port = None
return test_client
111 changes: 10 additions & 101 deletions tests/core/conftest.py
@@ -1,9 +1,11 @@
import asyncio
import os

from sanic.request import Request
import uuid
from datetime import datetime

from typing import Text
from typing import Text, Iterator

import pytest
from _pytest.tmpdir import TempdirFactory
Expand Down Expand Up @@ -96,7 +98,7 @@ def __init__(
# this event_loop is used by pytest-asyncio, and redefining it
# is currently the only way of changing the scope of this fixture
@pytest.yield_fixture(scope="session")
def event_loop(request):
def event_loop(request: Request) -> Iterator[asyncio.AbstractEventLoop]:
loop = asyncio.get_event_loop_policy().new_event_loop()
yield loop
loop.close()
Expand Down Expand Up @@ -136,56 +138,20 @@ def default_domain():
return Domain.load(DEFAULT_DOMAIN_PATH_WITH_SLOTS)


@pytest.fixture(scope="session")
async def _default_agent(default_domain: Domain) -> Agent:
agent = Agent(
default_domain,
policies=[MemoizationPolicy()],
interpreter=RegexInterpreter(),
tracker_store=InMemoryTrackerStore(default_domain),
)
training_data = await agent.load_data(DEFAULT_STORIES_FILE)
agent.train(training_data)
return agent


@pytest.fixture()
async def default_agent(_default_agent: Agent) -> Agent:
# Clean tracker store after each test so tests don't affect each other
_default_agent.tracker_store = InMemoryTrackerStore(_default_agent.domain)
_default_agent.domain.session_config = SessionConfig.default()
return _default_agent


@pytest.fixture(scope="session")
def default_agent_path(_default_agent: Agent, tmpdir_factory: TempdirFactory):
path = tmpdir_factory.mktemp("agent").strpath
_default_agent.persist(path)
return path


@pytest.fixture
def default_channel() -> OutputChannel:
return CollectingOutputChannel()


@pytest.fixture
async def default_processor(default_domain, default_nlg):
agent = Agent(
default_domain,
SimplePolicyEnsemble([AugmentedMemoizationPolicy()]),
interpreter=RegexInterpreter(),
)

training_data = await agent.load_data(DEFAULT_STORIES_FILE)
agent.train(training_data)
tracker_store = InMemoryTrackerStore(default_domain)
async def default_processor(default_agent: Agent) -> MessageProcessor:
tracker_store = InMemoryTrackerStore(default_agent.domain)
return MessageProcessor(
agent.interpreter,
agent.policy_ensemble,
default_domain,
default_agent.interpreter,
default_agent.policy_ensemble,
default_agent.domain,
tracker_store,
default_nlg,
TemplatedNaturalLanguageGenerator(default_agent.domain.templates),
)


Expand Down Expand Up @@ -243,39 +209,6 @@ def moodbot_metadata(unpacked_trained_moodbot_path):
)


@pytest.fixture()
async def trained_stack_model(
trained_async,
default_domain_path,
default_stack_config,
default_nlu_data,
default_stories_file,
):

trained_stack_model_path = await trained_async(
domain=default_domain_path,
config=default_stack_config,
training_files=[default_nlu_data, default_stories_file],
)

return trained_stack_model_path


@pytest.fixture
async def prepared_agent(tmpdir_factory) -> Agent:
model_path = tmpdir_factory.mktemp("model").strpath

agent = Agent(
"data/test_domains/default.yml",
policies=[AugmentedMemoizationPolicy(max_history=3)],
)

training_data = await agent.load_data(DEFAULT_STORIES_FILE)
agent.train(training_data)
agent.persist(model_path)
return agent


@pytest.fixture
def default_nlg(default_domain):
return TemplatedNaturalLanguageGenerator(default_domain.templates)
Expand All @@ -297,30 +230,6 @@ def project() -> Text:
return directory


def train_model(loop, project: Text, filename: Text = "test.tar.gz"):
from rasa.constants import (
DEFAULT_CONFIG_PATH,
DEFAULT_DATA_PATH,
DEFAULT_DOMAIN_PATH,
DEFAULT_MODELS_PATH,
)
import rasa.train

output = os.path.join(project, DEFAULT_MODELS_PATH, filename)
domain = os.path.join(project, DEFAULT_DOMAIN_PATH)
config = os.path.join(project, DEFAULT_CONFIG_PATH)
training_files = os.path.join(project, DEFAULT_DATA_PATH)

rasa.train(domain, config, training_files, output, loop=loop)

return output


@pytest.fixture(scope="session")
def trained_model(loop, project) -> Text:
return train_model(loop, project)


@pytest.fixture
async def restaurantbot(trained_async) -> Text:
restaurant_domain = os.path.join(RESTAURANTBOT_PATH, "domain.yml")
Expand Down

0 comments on commit 7e1e1b7

Please sign in to comment.