Skip to content

Commit

Permalink
Merge fa28213 into a6980b9
Browse files Browse the repository at this point in the history
  • Loading branch information
ricwo committed Sep 2, 2018
2 parents a6980b9 + fa28213 commit 0b7e4da
Show file tree
Hide file tree
Showing 14 changed files with 556 additions and 72 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -11,9 +11,14 @@ This project adheres to `Semantic Versioning`_ starting with version 0.7.0.

Added
-----
- ``EndpointConfig`` class that handles authenticated requests (ported from Rasa Core)
- ``DataRouter()`` class supports a ``model_server`` ``EndpointConfig``, which it regularly queries to fetch NLU models
- this can be used with ``rasa_nlu.server`` with the ``--endpoint`` option (the key for this the model server config is ``model``)
- docs on model fetching from a URL

Changed
-------
- loading training data from a URL requires an instance of ``EndpointConfig``

Removed
-------
Expand Down
2 changes: 2 additions & 0 deletions alt_requirements/requirements_dev.txt
Expand Up @@ -12,6 +12,8 @@ pytest==3.3.2
treq==17.8.0
moto==1.2.0
mock==2.0.0
responses==0.9.0
httpretty==0.9.5
# other
google-cloud-storage==1.7.0
azure-storage-blob==1.0.0
49 changes: 49 additions & 0 deletions docs/migrations.rst
Expand Up @@ -4,6 +4,55 @@ Migration Guide
This page contains information about changes between major versions and
how you can migrate from one version to another.

0.13.x to 0.13.3
----------------

- ``rasa_nlu.server`` needs to be supplied with an ``yml`` file defining the
model endpoint to retrieve training data. The file location has to be passed
with the ``--endpoints`` argument, e.g.
``python rasa_nlu.server --path projects --endpoints endpoints.yml``
``endpoints.yml`` needs to contain the ``model`` key
with a ``url`` and an optional ``token``. Here's an example:

.. code-block:: yaml
model:
url: http://my_model_server.com/models/default/nlu/tags/latest
token: my_model_server_token
- ``rasa_nlu.train`` also has to be run with the ``--endpoints`` argument
if you want to pull training data from a URL. This replaces the previous
``--url`` syntax.

.. code-block:: yaml
data:
url: http://my_data_server.com/projects/default/data
token: my_data_server_token
.. note::

Your endpoint file may contain entries for both ``model`` and ``data``.
``rasa_nlu.server`` and ``rasa_nlu.train`` will pick the relevant entry.

- If you directly access the ``DataRouter`` class or ``rasa_nlu.train``'s
``do_train()`` method, you can directly create instances of
``EndpointConfig`` without creating a ``yml`` file. Example:

.. code-block:: python
from rasa_nlu.utils import EndpointConfig
from rasa_nlu.data_router import DataRouter
model_endpoint = EndpointConfig(
url="http://my_model_server.com/models/default/nlu/tags/latest",
token="my_model_server_token"
)

interpreter = DataRouter("projects",
model_server=model_endpoint)


0.12.x to 0.13.0
----------------

Expand Down
76 changes: 50 additions & 26 deletions rasa_nlu/data_router.py
Expand Up @@ -3,30 +3,26 @@
from __future__ import print_function
from __future__ import unicode_literals

import glob
import datetime
import io
import logging
import tempfile

import datetime
import os
from builtins import object
from concurrent.futures import ProcessPoolExecutor as ProcessPool
from future.utils import PY3
from rasa_nlu.training_data import Message
from typing import Text, Dict, Any, Optional, List

from rasa_nlu import utils, config
from rasa_nlu.components import ComponentBuilder
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.evaluate import get_evaluation_metrics, clean_intent_labels
from rasa_nlu.model import InvalidProjectError
from rasa_nlu.project import Project
from rasa_nlu.project import Project, load_from_server
from rasa_nlu.train import do_train_in_worker, TrainingException
from rasa_nlu.training_data import Message
from rasa_nlu.training_data.loading import load_data
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.logger import jsonFileLogObserver, Logger
from typing import Text, Dict, Any, Optional, List

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -92,13 +88,17 @@ def __init__(self,
response_log=None,
emulation_mode=None,
remote_storage=None,
component_builder=None):
component_builder=None,
model_server=None,
wait_time_between_pulls=None):
self._training_processes = max(max_training_processes, 1)
self._current_training_processes = 0
self.responses = self._create_query_logger(response_log)
self.project_dir = config.make_path_absolute(project_dir)
self.emulator = self._create_emulator(emulation_mode)
self.remote_storage = remote_storage
self.model_server = model_server
self.wait_time_between_pulls = wait_time_between_pulls

if component_builder:
self.component_builder = component_builder
Expand Down Expand Up @@ -151,23 +151,47 @@ def _collect_projects(self, project_dir):
projects.extend(self._list_projects_in_cloud())
return projects

def _create_project_store(self, project_dir):
def _create_project_store(self,
project_dir):
projects = self._collect_projects(project_dir)

project_store = {}

for project in projects:
project_store[project] = Project(self.component_builder,
project,
self.project_dir,
self.remote_storage)
if self.model_server is not None:
project_store[project] = load_from_server(
self.component_builder,
project,
self.project_dir,
self.remote_storage,
self.model_server,
self.wait_time_between_pulls
)
else:
project_store[project] = Project(
self.component_builder,
project,
self.project_dir,
self.remote_storage
)

if not project_store:
default_model = RasaNLUModelConfig.DEFAULT_PROJECT_NAME
project_store[default_model] = Project(
project=RasaNLUModelConfig.DEFAULT_PROJECT_NAME,
project_dir=self.project_dir,
remote_storage=self.remote_storage)
default_project = RasaNLUModelConfig.DEFAULT_PROJECT_NAME
if self.model_server is not None:
project_store[default_project] = load_from_server(
self.component_builder,
default_project,
self.project_dir,
self.remote_storage,
self.model_server,
self.wait_time_between_pulls
)
else:

project_store[default_project] = Project(
project=default_project,
project_dir=self.project_dir,
remote_storage=self.remote_storage)
return project_store

def _pre_load(self, projects):
Expand Down Expand Up @@ -351,19 +375,19 @@ def training_errback(failure):
"component. This blocks the server during "
"training.")
model_path = do_train_in_worker(
train_config,
data_file,
path=self.project_dir,
project=project,
fixed_model_name=model_name,
storage=self.remote_storage)
train_config,
data_file,
path=self.project_dir,
project=project,
fixed_model_name=model_name,
storage=self.remote_storage)
model_dir = os.path.basename(os.path.normpath(model_path))
training_callback(model_dir)
return model_dir
except TrainingException as e:
logger.warning(e)
target_project = self.project_store.get(
e.failed_target_project)
e.failed_target_project)
if target_project:
target_project.status = 0
raise e
Expand Down
2 changes: 1 addition & 1 deletion rasa_nlu/model.py
Expand Up @@ -43,7 +43,7 @@ def __str__(self):


class UnsupportedModelError(Exception):
"""Raised when a model is to old to be loaded.
"""Raised when a model is too old to be loaded.
Attributes:
message -- explanation of why the model is invalid
Expand Down

0 comments on commit 0b7e4da

Please sign in to comment.