Skip to content

Commit

Permalink
Merge bea36ee into 3416744
Browse files Browse the repository at this point in the history
  • Loading branch information
ricwo committed Sep 11, 2018
2 parents 3416744 + bea36ee commit ce40a3d
Show file tree
Hide file tree
Showing 14 changed files with 578 additions and 83 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Expand Up @@ -11,9 +11,14 @@ This project adheres to `Semantic Versioning`_ starting with version 0.7.0.

Added
-----
- ``EndpointConfig`` class that handles authenticated requests (ported from Rasa Core)
- ``DataRouter()`` class supports a ``model_server`` ``EndpointConfig``, which it regularly queries to fetch NLU models
- this can be used with ``rasa_nlu.server`` with the ``--endpoint`` option (the key for this the model server config is ``model``)
- docs on model fetching from a URL

Changed
-------
- loading training data from a URL requires an instance of ``EndpointConfig``

- Changed evaluate behaviour to plot two histogram bars per bin.
Plotting confidence of right predictions in a wine-ish colour
Expand Down
2 changes: 2 additions & 0 deletions alt_requirements/requirements_dev.txt
Expand Up @@ -12,6 +12,8 @@ pytest==3.3.2
treq==17.8.0
moto==1.2.0
mock==2.0.0
responses==0.9.0
httpretty==0.9.5
# other
google-cloud-storage==1.7.0
azure-storage-blob==1.0.0
54 changes: 54 additions & 0 deletions docs/migrations.rst
Expand Up @@ -4,6 +4,60 @@ Migration Guide
This page contains information about changes between major versions and
how you can migrate from one version to another.

0.13.x to 0.13.3
----------------
- ``rasa_nlu.server`` has to be supplied with a ``yml`` file defining the
model endpoint from which to retrieve training data. The file location has
be passed with the ``--endpoints`` argument, e.g.
``python rasa_nlu.server --path projects --endpoints endpoints.yml``
``endpoints.yml`` needs to contain the ``model`` key
with a ``url`` and an optional ``token``. Here's an example:

.. code-block:: yaml
model:
url: http://my_model_server.com/models/default/nlu/tags/latest
token: my_model_server_token
.. note::

If you configure ``rasa_nlu.server`` to pull models from a remote server,
the default project name will be used. It is defined
``RasaNLUModelConfig.DEFAULT_PROJECT_NAME``.


- ``rasa_nlu.train`` can also be run with the ``--endpoints`` argument
if you want to pull training data from a URL. Alternatively, the
current ``--url`` syntax is still supported.

.. code-block:: yaml
data:
url: http://my_data_server.com/projects/default/data
token: my_data_server_token
.. note::

Your endpoint file may contain entries for both ``model`` and ``data``.
``rasa_nlu.server`` and ``rasa_nlu.train`` will pick the relevant entry.

- If you directly access the ``DataRouter`` class or ``rasa_nlu.train``'s
``do_train()`` method, you can directly create instances of
``EndpointConfig`` without creating a ``yml`` file. Example:

.. code-block:: python
from rasa_nlu.utils import EndpointConfig
from rasa_nlu.data_router import DataRouter
model_endpoint = EndpointConfig(
url="http://my_model_server.com/models/default/nlu/tags/latest",
token="my_model_server_token"
)
interpreter = DataRouter("projects", model_server=model_endpoint)
0.12.x to 0.13.0
----------------

Expand Down
81 changes: 48 additions & 33 deletions rasa_nlu/data_router.py
Expand Up @@ -3,30 +3,27 @@
from __future__ import print_function
from __future__ import unicode_literals

import glob
import datetime
import io
import logging
import tempfile

import datetime
import os
from builtins import object
from concurrent.futures import ProcessPoolExecutor as ProcessPool
from future.utils import PY3
from rasa_nlu.training_data import Message
from typing import Text, Dict, Any, Optional, List

from builtins import object
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.logger import jsonFileLogObserver, Logger

from rasa_nlu import utils, config
from rasa_nlu.components import ComponentBuilder
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.evaluate import get_evaluation_metrics, clean_intent_labels
from rasa_nlu.model import InvalidProjectError
from rasa_nlu.project import Project
from rasa_nlu.project import Project, load_from_server
from rasa_nlu.train import do_train_in_worker, TrainingException
from rasa_nlu.training_data import Message
from rasa_nlu.training_data.loading import load_data
from twisted.internet import reactor
from twisted.internet.defer import Deferred
from twisted.logger import jsonFileLogObserver, Logger
from typing import Text, Dict, Any, Optional, List

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -92,13 +89,17 @@ def __init__(self,
response_log=None,
emulation_mode=None,
remote_storage=None,
component_builder=None):
component_builder=None,
model_server=None,
wait_time_between_pulls=None):
self._training_processes = max(max_training_processes, 1)
self._current_training_processes = 0
self.responses = self._create_query_logger(response_log)
self.project_dir = config.make_path_absolute(project_dir)
self.emulator = self._create_emulator(emulation_mode)
self.remote_storage = remote_storage
self.model_server = model_server
self.wait_time_between_pulls = wait_time_between_pulls

if component_builder:
self.component_builder = component_builder
Expand Down Expand Up @@ -151,23 +152,37 @@ def _collect_projects(self, project_dir):
projects.extend(self._list_projects_in_cloud())
return projects

def _create_project_store(self, project_dir):
def _create_project_store(self,
project_dir):
default_project = RasaNLUModelConfig.DEFAULT_PROJECT_NAME

projects = self._collect_projects(project_dir)

project_store = {}

for project in projects:
project_store[project] = Project(self.component_builder,
project,
self.project_dir,
self.remote_storage)

if not project_store:
default_model = RasaNLUModelConfig.DEFAULT_PROJECT_NAME
project_store[default_model] = Project(
project=RasaNLUModelConfig.DEFAULT_PROJECT_NAME,
project_dir=self.project_dir,
remote_storage=self.remote_storage)
if self.model_server is not None:
project_store[default_project] = load_from_server(
self.component_builder,
default_project,
self.project_dir,
self.remote_storage,
self.model_server,
self.wait_time_between_pulls
)
else:
for project in projects:
project_store[project] = Project(self.component_builder,
project,
self.project_dir,
self.remote_storage)

if not project_store:
project_store[default_project] = Project(
project=default_project,
project_dir=self.project_dir,
remote_storage=self.remote_storage
)

return project_store

def _pre_load(self, projects):
Expand Down Expand Up @@ -351,19 +366,19 @@ def training_errback(failure):
"component. This blocks the server during "
"training.")
model_path = do_train_in_worker(
train_config,
data_file,
path=self.project_dir,
project=project,
fixed_model_name=model_name,
storage=self.remote_storage)
train_config,
data_file,
path=self.project_dir,
project=project,
fixed_model_name=model_name,
storage=self.remote_storage)
model_dir = os.path.basename(os.path.normpath(model_path))
training_callback(model_dir)
return model_dir
except TrainingException as e:
logger.warning(e)
target_project = self.project_store.get(
e.failed_target_project)
e.failed_target_project)
if target_project:
target_project.status = 0
raise e
Expand Down
2 changes: 1 addition & 1 deletion rasa_nlu/model.py
Expand Up @@ -43,7 +43,7 @@ def __str__(self):


class UnsupportedModelError(Exception):
"""Raised when a model is to old to be loaded.
"""Raised when a model is too old to be loaded.
Attributes:
message -- explanation of why the model is invalid
Expand Down

0 comments on commit ce40a3d

Please sign in to comment.