diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 92378e48a579..46c262e99b8d 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -15,7 +15,10 @@ Added Changed ------- - +- substitute LSTM with Transformer in ``EmbeddingPolicy`` +- ``EmbeddingPolicy`` can now use ``MaxHistoryTrackerFeaturizer`` +- non zero ``evaluate_on_num_examples`` in ``EmbeddingPolicy`` is the size of + hold out validation set that is excluded from training data Removed ------- @@ -23,7 +26,9 @@ Removed Fixed ----- - +- ``MappingPolicy`` standard featurizer is set to ``None`` +- ``Flood control exceeded`` error in Telegram connector which happened because the + webhook was set twice [1.2.2] - 2019-08-07 ^^^^^^^^^^^^^^^^^^^^ @@ -66,8 +71,8 @@ Changed Fixed ----- - ``rasa test core`` can handle compressed model files -- Rasa can handle story files containing multi line comments -- Template will retain `{` if escaped with `{`. e.g. `{{"foo": {bar}}}` will result in `{"foo": "replaced value"}` +- rasa can handle story files containing multi line comments +- template will retain `{` if escaped with `{`. e.g. `{{"foo": {bar}}}` will result in `{"foo": "replaced value"}` [1.1.8] - 2019-07-25 ^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/core/policies.rst b/docs/core/policies.rst index 89d71db64bef..a178cbac5897 100644 --- a/docs/core/policies.rst +++ b/docs/core/policies.rst @@ -167,47 +167,27 @@ set the ``random_seed`` attribute of the ``KerasPolicy`` to any integer. Embedding Policy ^^^^^^^^^^^^^^^^ -The Recurrent Embedding Dialogue Policy (REDP) -described in our paper: ``_ +Transformer Embedding Dialogue Policy (TEDP) + +Transformer version of the Recurrent Embedding Dialogue Policy (REDP) +used in our paper: ``_ This policy has a pre-defined architecture, which comprises the following steps: - - apply dense layers to create embeddings for user intents, - entities and system actions including previous actions and slots; - - use the embeddings of previous user inputs as a user memory - and embeddings of previous system actions as a system memory; - - concatenate user input, previous system action and slots - embeddings for current time into an input vector to rnn; - - using user and previous system action embeddings from the input - vector, calculate attention probabilities over the user and - system memories (for system memory, this policy uses - `NTM mechanism `_ with attention - by location); - - sum the user embedding and user attention vector and feed it - and the embeddings of the slots as an input to an LSTM cell; - - apply a dense layer to the output of the LSTM to get a raw - recurrent embedding of a dialogue; - - sum this raw recurrent embedding of a dialogue with system - attention vector to create dialogue level embedding, this step - allows the algorithm to repeat previous system action by copying - its embedding vector directly to the current time output; - - weight previous LSTM states with system attention probabilities - to get the previous action embedding, the policy is likely payed - attention to; - - if the similarity between this previous action embedding and - current time dialogue embedding is high, overwrite current LSTM - state with the one from the time when this action happened; - - for each LSTM time step, calculate the similarity between the + - concatenate user input (user intent and entities), + previous system action, slots and active form + for each time step into an input vector + to pre-transformer embedding layer; + - feed it to transformer; + - apply a dense layer to the output of the transformer + to get embeddings of a dialogue for each time step; + - apply a dense layer to create embeddings for system actions for each time step; + - calculate the similarity between the dialogue embedding and embedded system actions. This step is based on the `StarSpace `_ idea. -.. note:: - - This policy only works with - ``FullDialogueTrackerFeaturizer(state_featurizer)``. - It is recommended to use ``state_featurizer=LabelTokenizerSingleStateFeaturizer(...)`` (see :ref:`featurization` for details). @@ -221,52 +201,32 @@ It is recommended to use Pass an appropriate number of ``epochs`` to the ``EmbeddingPolicy``, otherwise the policy will be trained only for ``1`` - epoch. Since this is an embedding based policy, it requires a large - number of epochs, which depends on the complexity of the - training data and whether attention is used or not. - - The main feature of this policy is an **attention** mechanism over - previous user input and system actions. - **Attention is turned on by default**; in order to turn it off, - configure the following parameters: - - - ``attn_before_rnn`` if ``true`` the algorithm will use - attention mechanism over previous user input, default ``true``; - - ``attn_after_rnn`` if ``true`` the algorithm will use - attention mechanism over previous system actions and will be - able to copy previously executed action together with LSTM's - hidden state from its history, default ``true``; - - ``sparse_attention`` if ``true`` ``sparsemax`` will be used - instead of ``softmax`` for attention probabilities, default - ``false``; - - ``attn_shift_range`` the range of allowed location-based - attention shifts for system memory (``attn_after_rnn``), see - ``_ for details; + epoch. - .. note:: - - Attention requires larger values of ``epochs`` and takes longer - to train. But it can learn more complicated and nonlinear behaviour. + The main feature of this policy is **transformer**. The algorithm also has hyper-parameters to control: - neural network's architecture: - - ``hidden_layers_sizes_a`` sets a list of hidden layers - sizes before embedding layer for user inputs, the number - of hidden layers is equal to the length of the list; - ``hidden_layers_sizes_b`` sets a list of hidden layers sizes before embedding layer for system actions, the number of hidden layers is equal to the length of the list; - - ``rnn_size`` sets the number of units in the LSTM cell; + - ``transformer_size`` sets the number of units in the transfomer; + - ``num_transformer_layers`` sets the number of transformer layers; + - ``pos_encoding`` sets the type of positional encoding in transformer, + it should be either ``timing`` or ``emb``; + - ``max_seq_length`` sets maximum sequence length + if embedding positional encodings are used; + - ``num_heads`` sets the number of heads in multihead attention; - training: - - ``layer_norm`` if ``true`` layer normalization for lstm - cell is turned on, default ``true``; - ``batch_size`` sets the number of training examples in one forward/backward pass, the higher the batch size, the more memory space you'll need; + - ``batch_strategy`` sets the type of batching strategy, + it should be either ``sequence`` or ``balanced``; - ``epochs`` sets the number of times the algorithm will see training data, where one ``epoch`` equals one forward pass and one backward pass of all the training examples; @@ -276,38 +236,52 @@ It is recommended to use - embedding: - ``embed_dim`` sets the dimension of embedding space; - - ``mu_pos`` controls how similar the algorithm should try - to make embedding vectors for correct intent labels; - - ``mu_neg`` controls maximum negative similarity for - incorrect intents; - - ``similarity_type`` sets the type of the similarity, - it should be either ``cosine`` or ``inner``; - ``num_neg`` sets the number of incorrect intent labels, the algorithm will minimize their similarity to the user input during training; + - ``similarity_type`` sets the type of the similarity, + it should be either ``auto``, ``cosine`` or ``inner``, + if ``auto``, it will be set depending on ``loss_type``, + ``inner`` for ``softmax``, ``cosine`` for ``margin``; + - ``loss_type`` sets the type of the loss function, + it should be either ``softmax`` or ``margin``; + - ``mu_pos`` controls how similar the algorithm should try + to make embedding vectors for correct intent labels, + used only if ``loss_type`` is set to ``margin``; + - ``mu_neg`` controls maximum negative similarity for + incorrect intents, + used only if ``loss_type`` is set to ``margin``; - ``use_max_sim_neg`` if ``true`` the algorithm only - minimizes maximum similarity over incorrect intent labels; + minimizes maximum similarity over incorrect intent labels, + used only if ``loss_type`` is set to ``margin``; + - ``scale_loss`` if ``true`` the algorithm will downscale the loss + for examples where correct label is predicted with high confidence, + used only if ``loss_type`` is set to ``softmax``; - regularization: - ``C2`` sets the scale of L2 regularization - ``C_emb`` sets the scale of how important is to minimize the maximum similarity between embeddings of different - intent labels; - - ``droprate_a`` sets the dropout rate between hidden + intent labels, used only if ``loss_type`` is set to ``margin``; + - ``droprate_a`` sets the dropout rate between layers before embedding layer for user inputs; - - ``droprate_b`` sets the dropout rate between hidden layers + - ``droprate_b`` sets the dropout rate between layers before embedding layer for system actions; - - ``droprate_rnn`` sets the recurrent dropout rate on - the LSTM hidden state ``_; - train accuracy calculation: - ``evaluate_every_num_epochs`` sets how often to calculate train accuracy, small values may hurt performance; - ``evaluate_on_num_examples`` how many examples to use for - calculation of train accuracy, large values may hurt - performance. + hold out validation set to calculate of validation accuracy, + large values may hurt performance. + + .. warning:: + + if ``evaluate_on_num_examples`` is non zero, random examples will be + picked by stratified split and used as **hold out** validation set, + so they will be excluded from training data. .. note:: diff --git a/rasa/core/featurizers.py b/rasa/core/featurizers.py index 76634cac1517..ae1dab2f49d0 100644 --- a/rasa/core/featurizers.py +++ b/rasa/core/featurizers.py @@ -19,23 +19,21 @@ class SingleStateFeaturizer(object): - """Base class for mechanisms to transform the conversations state - into machine learning formats. + """Base class for mechanisms to transform the conversations state into ML formats. Subclasses of SingleStateFeaturizer decide how the bot will transform the conversation state to a format which a classifier can read: - feature vector.""" - - def __init__(self): - """Declares instant variables.""" - self.user_feature_len = None - self.slot_feature_len = None + feature vector. + """ def prepare_from_domain(self, domain: Domain) -> None: - """Helper method to init based on domain""" + """Helper method to init based on domain.""" + pass def encode(self, state: Dict[Text, float]) -> np.ndarray: + """Encode user input.""" + raise NotImplementedError( "SingleStateFeaturizer must have " "the capacity to " @@ -44,6 +42,8 @@ def encode(self, state: Dict[Text, float]) -> np.ndarray: @staticmethod def action_as_one_hot(action: Text, domain: Domain) -> np.ndarray: + """Encode system action as one-hot vector.""" + if action is None: return np.ones(domain.num_actions, dtype=int) * -1 @@ -52,49 +52,50 @@ def action_as_one_hot(action: Text, domain: Domain) -> np.ndarray: return y def create_encoded_all_actions(self, domain: Domain) -> np.ndarray: - """Create matrix with all actions from domain - encoded in rows.""" + """Create matrix with all actions from domain encoded in rows.""" + pass class BinarySingleStateFeaturizer(SingleStateFeaturizer): """Assumes all features are binary. - All features should be either on or off, denoting them with 1 or 0.""" + All features should be either on or off, denoting them with 1 or 0. + """ def __init__(self): """Declares instant variables.""" + super(BinarySingleStateFeaturizer, self).__init__() self.num_features = None self.input_state_map = None def prepare_from_domain(self, domain: Domain) -> None: + """Use Domain to prepare featurizer.""" + self.num_features = domain.num_states self.input_state_map = domain.input_state_map - self.user_feature_len = len(domain.intent_states) + len(domain.entity_states) - self.slot_feature_len = len(domain.slot_states) - def encode(self, state: Dict[Text, float]) -> np.ndarray: """Returns a binary vector indicating which features are active. - Given a dictionary of states (e.g. 'intent_greet', - 'prev_action_listen',...) return a binary vector indicating which - features of `self.input_features` are in the bag. NB it's a - regular double precision float array type. + Given a dictionary of states (e.g. 'intent_greet', + 'prev_action_listen',...) return a binary vector indicating which + features of `self.input_features` are in the bag. NB it's a + regular double precision float array type. - For example with two active features out of five possible features - this would return a vector like `[0 0 1 0 1]` + For example with two active features out of five possible features + this would return a vector like `[0 0 1 0 1]` - If intent features are given with a probability, for example - with two active features and two uncertain intents out - of five possible features this would return a vector - like `[0.3, 0.7, 1.0, 0, 1.0]`. + If intent features are given with a probability, for example + with two active features and two uncertain intents out + of five possible features this would return a vector + like `[0.3, 0.7, 1.0, 0, 1.0]`. - If this is just a padding vector we set all values to `-1`. - padding vectors are specified by a `None` or `[None]` - value for states. + If this is just a padding vector we set all values to `-1`. + padding vectors are specified by a `None` or `[None]` + value for states. """ if not self.num_features: @@ -127,15 +128,16 @@ def encode(self, state: Dict[Text, float]) -> np.ndarray: return used_features def create_encoded_all_actions(self, domain: Domain) -> np.ndarray: - """Create matrix with all actions from domain - encoded in rows as bag of words.""" + """Create matrix with all actions from domain encoded in rows as bag of words""" + return np.eye(domain.num_actions) class LabelTokenizerSingleStateFeaturizer(SingleStateFeaturizer): - """SingleStateFeaturizer that splits user intents and - bot action names into tokens and uses these tokens to - create bag-of-words feature vectors. + """Creates bag-of-words feature vectors. + + User intents and bot action names are split into tokens + and used to create bag-of-words feature vectors. Args: split_symbol: The symbol that separates words in @@ -165,8 +167,10 @@ def __init__( @staticmethod def _create_label_token_dict(labels, split_symbol="_"): """Splits labels into tokens by using provided symbol. + Creates the lookup dictionary for this tokens. - Values in this dict are used for featurization.""" + Values in this dict are used for featurization. + """ distinct_tokens = set( [token for label in labels for token in label.split(split_symbol)] @@ -174,10 +178,10 @@ def _create_label_token_dict(labels, split_symbol="_"): return {token: idx for idx, token in enumerate(sorted(distinct_tokens))} def prepare_from_domain(self, domain: Domain) -> None: - """Creates internal vocabularies for user intents - and bot actions to use for featurization""" + """Creates internal vocabularies for user intents and bot actions.""" + self.user_labels = domain.intent_states + domain.entity_states - self.slot_labels = domain.slot_states + self.slot_labels = domain.slot_states + domain.form_states self.bot_labels = domain.action_names if self.use_shared_vocab: @@ -197,10 +201,9 @@ def prepare_from_domain(self, domain: Domain) -> None: len(self.user_vocab) + len(self.slot_labels) + len(self.bot_vocab) ) - self.user_feature_len = len(self.user_vocab) - self.slot_feature_len = len(self.slot_labels) - def encode(self, state: Dict[Text, float]) -> np.ndarray: + """Returns a binary vector indicating which tokens are present.""" + if not self.num_features: raise Exception( "LabelTokenizerSingleStateFeaturizer " @@ -246,10 +249,10 @@ def encode(self, state: Dict[Text, float]) -> np.ndarray: return used_features def create_encoded_all_actions(self, domain: Domain) -> np.ndarray: - """Create matrix with all actions from domain - encoded in rows as bag of words.""" + """Create matrix with all actions from domain encoded in rows as bag of words""" + encoded_all_actions = np.zeros( - (domain.num_actions, len(self.bot_vocab)), dtype=int + (domain.num_actions, len(self.bot_vocab)), dtype=np.int32 ) for idx, name in enumerate(domain.action_names): for t in name.split(self.split_symbol): @@ -258,7 +261,7 @@ def create_encoded_all_actions(self, domain: Domain) -> np.ndarray: class TrackerFeaturizer(object): - """Base class for actual tracker featurizers""" + """Base class for actual tracker featurizers.""" def __init__( self, @@ -266,7 +269,7 @@ def __init__( use_intent_probabilities: bool = False, ) -> None: - self.state_featurizer = state_featurizer or SingleStateFeaturizer() + self.state_featurizer = state_featurizer self.use_intent_probabilities = use_intent_probabilities def _create_states( @@ -276,9 +279,12 @@ def _create_states( is_binary_training: bool = False, ) -> List[Dict[Text, float]]: """Create states: a list of dictionaries. - If use_intent_probabilities is False (default behaviour), - pick the most probable intent out of all provided ones and - set its probability to 1.0, while all the others to 0.0.""" + + If use_intent_probabilities is False (default behaviour), + pick the most probable intent out of all provided ones and + set its probability to 1.0, while all the others to 0.0. + """ + states = tracker.past_states(domain) # during training we encounter only 1 or 0 @@ -312,12 +318,15 @@ def _create_states( return [dict(state) for state in states] def _pad_states(self, states: List[Any]) -> List[Any]: + """Pads states.""" + return states def _featurize_states( self, trackers_as_states: List[List[Dict[Text, float]]] ) -> Tuple[np.ndarray, List[int]]: - """Create X""" + """Create X.""" + features = [] true_lengths = [] @@ -346,7 +355,7 @@ def _featurize_states( def _featurize_labels( self, trackers_as_actions: List[List[Text]], domain: Domain ) -> np.ndarray: - """Create y""" + """Create y.""" labels = [] for tracker_actions in trackers_as_actions: @@ -361,15 +370,18 @@ def _featurize_labels( labels.append(story_labels) + y = np.array(labels) # if it is MaxHistoryFeaturizer, squeeze out time axis - y = np.array(labels).squeeze() + if y.ndim == 3 and isinstance(self, MaxHistoryTrackerFeaturizer): + y = y[:, 0, :] return y def training_states_and_actions( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[Dict]], List[List[Text]]]: - """Transforms list of trackers to lists of states and actions""" + """Transforms list of trackers to lists of states and actions.""" + raise NotImplementedError( "Featurizer must have the capacity to encode trackers to feature vectors" ) @@ -377,7 +389,8 @@ def training_states_and_actions( def featurize_trackers( self, trackers: List[DialogueStateTracker], domain: Domain ) -> DialogueTrainingData: - """Create training data""" + """Create training data.""" + self.state_featurizer.prepare_from_domain(domain) (trackers_as_states, trackers_as_actions) = self.training_states_and_actions( @@ -393,7 +406,8 @@ def featurize_trackers( def prediction_states( self, trackers: List[DialogueStateTracker], domain: Domain ) -> List[List[Dict[Text, float]]]: - """Transforms list of trackers to lists of states for prediction""" + """Transforms list of trackers to lists of states for prediction.""" + raise NotImplementedError( "Featurizer must have the capacity to create feature vector" ) @@ -402,7 +416,7 @@ def prediction_states( def create_X( self, trackers: List[DialogueStateTracker], domain: Domain ) -> np.ndarray: - """Create X for prediction""" + """Create X for prediction.""" trackers_as_states = self.prediction_states(trackers, domain) X, _ = self._featurize_states(trackers_as_states) @@ -417,6 +431,8 @@ def persist(self, path): @staticmethod def load(path): + """Loads the featurizer from file.""" + featurizer_file = os.path.join(path, "featurizer.json") if os.path.isfile(featurizer_file): return jsonpickle.decode(rasa.utils.io.read_file(featurizer_file)) @@ -429,17 +445,18 @@ def load(path): class FullDialogueTrackerFeaturizer(TrackerFeaturizer): - """Tracker featurizer that takes the trackers - and creates full dialogue training data for - time distributed rnn. - Training data is padded up to the length of the longest - dialogue with -1""" + """Creates full dialogue training data for time distributed architectures. + + Creates training data that uses each time output for prediction. + Training data is padded up to the length of the longest dialogue with -1. + """ def __init__( self, state_featurizer: SingleStateFeaturizer, use_intent_probabilities: bool = False, ) -> None: + super(FullDialogueTrackerFeaturizer, self).__init__( state_featurizer, use_intent_probabilities ) @@ -447,13 +464,15 @@ def __init__( @staticmethod def _calculate_max_len(trackers_as_actions): + """Calculate the length of the longest dialogue.""" + if trackers_as_actions: return max([len(states) for states in trackers_as_actions]) else: return None def _pad_states(self, states: List[Any]) -> List[Any]: - """Pads states up to max_len""" + """Pads states up to max_len.""" if len(states) < self.max_len: states += [None] * (self.max_len - len(states)) @@ -463,6 +482,10 @@ def _pad_states(self, states: List[Any]) -> List[Any]: def training_states_and_actions( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[Dict]], List[List[Text]]]: + """Transforms list of trackers to lists of states and actions. + + Training data is padded up to the length of the longest dialogue with -1. + """ trackers_as_states = [] trackers_as_actions = [] @@ -510,6 +533,7 @@ def training_states_and_actions( def prediction_states( self, trackers: List[DialogueStateTracker], domain: Domain ) -> List[List[Dict[Text, float]]]: + """Transforms list of trackers to lists of states for prediction.""" trackers_as_states = [ self._create_states(tracker, domain) for tracker in trackers @@ -519,11 +543,11 @@ def prediction_states( class MaxHistoryTrackerFeaturizer(TrackerFeaturizer): - """Tracker featurizer that takes the trackers, - slices them into max_history batches and - creates training data for rnn that uses last output - for prediction. - Training data is padded up to the max_history with -1""" + """Slices the tracker history into max_history batches. + + Creates training data that uses last output for prediction. + Training data is padded up to the max_history with -1. + """ MAX_HISTORY_DEFAULT = 5 @@ -534,6 +558,7 @@ def __init__( remove_duplicates: bool = True, use_intent_probabilities: bool = False, ) -> None: + super(MaxHistoryTrackerFeaturizer, self).__init__( state_featurizer, use_intent_probabilities ) @@ -547,7 +572,8 @@ def slice_state_history( """Slices states from the trackers history. If the slice is at the array borders, padding will be added to ensure - the slice length.""" + the slice length. + """ slice_end = len(states) slice_start = max(0, slice_end - slice_length) @@ -558,6 +584,8 @@ def slice_state_history( @staticmethod def _hash_example(states, action): + """Hash states for efficient deduplication.""" + frozen_states = tuple( (s if s is None else frozenset(s.items()) for s in states) ) @@ -567,6 +595,10 @@ def _hash_example(states, action): def training_states_and_actions( self, trackers: List[DialogueStateTracker], domain: Domain ) -> Tuple[List[List[Optional[Dict[Text, float]]]], List[List[Text]]]: + """Transforms list of trackers to lists of states and actions. + + Training data is padded up to the max_history with -1. + """ trackers_as_states = [] trackers_as_actions = [] @@ -621,6 +653,7 @@ def training_states_and_actions( def prediction_states( self, trackers: List[DialogueStateTracker], domain: Domain ) -> List[List[Dict[Text, float]]]: + """Transforms list of trackers to lists of states for prediction.""" trackers_as_states = [ self._create_states(tracker, domain) for tracker in trackers diff --git a/rasa/core/policies/embedding_policy.py b/rasa/core/policies/embedding_policy.py index d1e5e1864cf1..6224513951cc 100644 --- a/rasa/core/policies/embedding_policy.py +++ b/rasa/core/policies/embedding_policy.py @@ -3,65 +3,51 @@ import json import logging import os +import pickle import warnings import numpy as np import typing from tqdm import tqdm -from typing import Any, List, Optional, Text, Dict, Tuple, Union +from typing import Any, List, Optional, Text, Dict, Tuple, Union, Generator, Callable import rasa.utils.io from rasa.core import utils -from rasa.core.actions.action import ACTION_LISTEN_NAME from rasa.core.domain import Domain from rasa.core.featurizers import ( TrackerFeaturizer, FullDialogueTrackerFeaturizer, LabelTokenizerSingleStateFeaturizer, + MaxHistoryTrackerFeaturizer, ) from rasa.core.policies.policy import Policy +from rasa.core.trackers import DialogueStateTracker +from rasa.utils.common import is_logging_disabled +from sklearn.model_selection import train_test_split import tensorflow as tf -from rasa.core.policies.tf_utils import ( - TimeAttentionWrapper, - ChronoBiasLayerNormBasicLSTMCell, +from tensor2tensor.models.transformer import ( + transformer_base, + transformer_prepare_encoder, + transformer_encoder, ) -from rasa.core.trackers import DialogueStateTracker -from rasa.utils.common import is_logging_disabled +from tensor2tensor.layers.common_attention import large_compatible_negative if typing.TYPE_CHECKING: - from rasa.core.policies.tf_utils import TimeAttentionWrapperState - -try: - import cPickle as pickle # pytype: disable=import-error -except ImportError: - import pickle - -tf.contrib._warning = None # avoid warning println on contrib import - remove for tf 2 + from tensor2tensor.utils.hparam import HParams +# avoid warning println on contrib import - remove for tf 2 +tf.contrib._warning = None logger = logging.getLogger(__name__) # namedtuple for all tf session related data -SessionData = namedtuple( - "SessionData", - ( - "X", - "Y", - "slots", - "previous_actions", - "actions_for_Y", - "x_for_no_intent", - "y_for_no_action", - "y_for_action_listen", - "all_Y_d", - ), -) +SessionData = namedtuple("SessionData", ("X", "Y", "labels")) class EmbeddingPolicy(Policy): - """Recurrent Embedding Dialogue Policy (REDP) + """Transformer Embedding Dialogue Policy (TEDP) - The policy that is used in our paper https://arxiv.org/abs/1811.11707 + Transformer version of the REDP used in our paper https://arxiv.org/abs/1811.11707 """ SUPPORTS_ONLINE_TRAINING = True @@ -71,18 +57,26 @@ class EmbeddingPolicy(Policy): # nn architecture # a list of hidden layers sizes before user embed layer # number of hidden layers is equal to the length of this list - "hidden_layers_sizes_a": [], + "hidden_layers_sizes_pre_dial": [], # a list of hidden layers sizes before bot embed layer # number of hidden layers is equal to the length of this list - "hidden_layers_sizes_b": [], - # number of units in rnn cell - "rnn_size": 64, + "hidden_layers_sizes_bot": [], + # number of units in transformer + "transformer_size": 128, + # number of transformer layers + "num_transformer_layers": 1, + # type of positional encoding in transformer + "pos_encoding": "timing", # string 'timing' or 'emb' + # max sequence length if pos_encoding='emb' + "max_seq_length": 256, + # number of attention heads in transformer + "num_heads": 4, # training parameters - # flag if to turn on layer normalization for lstm cell - "layer_norm": True, - # initial and final batch sizes - batch size will be - # linearly increased for each epoch + # initial and final batch sizes: + # batch size will be linearly increased for each epoch "batch_size": [8, 32], + # how to create batches + "batch_strategy": "sequence", # string 'sequence' or 'balanced' # number of epochs "epochs": 1, # set random seed to any int to get reproducible results @@ -90,165 +84,115 @@ class EmbeddingPolicy(Policy): # embedding parameters # dimension size of embedding vectors "embed_dim": 20, + # the type of the similarity + "num_neg": 20, + # flag if minimize only maximum similarity over incorrect actions + "similarity_type": "auto", # string 'auto' or 'cosine' or 'inner' + # the type of the loss function + "loss_type": "softmax", # string 'softmax' or 'margin' # how similar the algorithm should try # to make embedding vectors for correct actions "mu_pos": 0.8, # should be 0.0 < ... < 1.0 for 'cosine' # maximum negative similarity for incorrect actions "mu_neg": -0.2, # should be -1.0 < ... < 1.0 for 'cosine' - # the type of the similarity - "similarity_type": "cosine", # string 'cosine' or 'inner' # the number of incorrect actions, the algorithm will minimize # their similarity to the user input during training - "num_neg": 20, - # flag if minimize only maximum similarity over incorrect actions "use_max_sim_neg": True, # flag which loss function to use + # scale loss inverse proportionally to confidence of correct prediction + "scale_loss": True, # regularization # the scale of L2 regularization "C2": 0.001, # the scale of how important is to minimize the maximum similarity # between embeddings of different actions "C_emb": 0.8, - # scale loss with inverse frequency of bot actions - "scale_loss_by_action_counts": True, - # dropout rate for user nn - "droprate_a": 0.0, + # dropout rate for dial nn + "droprate_a": 0.1, # dropout rate for bot nn "droprate_b": 0.0, - # dropout rate for rnn - "droprate_rnn": 0.1, - # attention parameters - # flag to use attention over user input - # as an input to rnn - "attn_before_rnn": True, - # flag to use attention over prev bot actions - # and copy it to output bypassing rnn - "attn_after_rnn": True, - # flag to use `sparsemax` instead of `softmax` for attention - "sparse_attention": False, # flag to use sparsemax for probs - # the range of allowed location-based attention shifts - "attn_shift_range": None, # if None, set to mean dialogue length / 2 # visualization of accuracy - # how often calculate train accuracy + # how often calculate validation accuracy "evaluate_every_num_epochs": 20, # small values may hurt performance - # how many examples to use for calculation of train accuracy - "evaluate_on_num_examples": 100, # large values may hurt performance + # how many examples to use for hold out validation set + "evaluate_on_num_examples": 0, # large values may hurt performance } # end default properties (DOC MARKER - don't remove) - @classmethod - def _standard_featurizer(cls): - return FullDialogueTrackerFeaturizer(LabelTokenizerSingleStateFeaturizer()) + @staticmethod + def _standard_featurizer(max_history: Optional[int] = None) -> "TrackerFeaturizer": + if max_history is None: + return FullDialogueTrackerFeaturizer(LabelTokenizerSingleStateFeaturizer()) + else: + return MaxHistoryTrackerFeaturizer( + LabelTokenizerSingleStateFeaturizer(), max_history=max_history + ) def __init__( self, - featurizer: Optional[FullDialogueTrackerFeaturizer] = None, + featurizer: Optional["TrackerFeaturizer"] = None, priority: int = 1, - encoded_all_actions: Optional[np.ndarray] = None, - graph: Optional[tf.Graph] = None, - session: Optional[tf.Session] = None, - intent_placeholder: Optional[tf.Tensor] = None, - action_placeholder: Optional[tf.Tensor] = None, - slots_placeholder: Optional[tf.Tensor] = None, - prev_act_placeholder: Optional[tf.Tensor] = None, - dialogue_len: Optional[tf.Tensor] = None, - x_for_no_intent: Optional[tf.Tensor] = None, - y_for_no_action: Optional[tf.Tensor] = None, - y_for_action_listen: Optional[tf.Tensor] = None, - similarity_op: Optional[tf.Tensor] = None, - alignment_history: Optional[tf.Tensor] = None, - user_embed: Optional[tf.Tensor] = None, - bot_embed: Optional[tf.Tensor] = None, - slot_embed: Optional[tf.Tensor] = None, - dial_embed: Optional[tf.Tensor] = None, - rnn_embed: Optional[tf.Tensor] = None, - attn_embed: Optional[tf.Tensor] = None, - copy_attn_debug: Optional[tf.Tensor] = None, - all_time_masks: Optional[tf.Tensor] = None, + graph: Optional["tf.Graph"] = None, + session: Optional["tf.Session"] = None, + user_placeholder: Optional["tf.Tensor"] = None, + bot_placeholder: Optional["tf.Tensor"] = None, + similarity_all: Optional["tf.Tensor"] = None, + pred_confidence: Optional["tf.Tensor"] = None, + similarity: Optional["tf.Tensor"] = None, + dial_embed: Optional["tf.Tensor"] = None, + bot_embed: Optional["tf.Tensor"] = None, + all_bot_embed: Optional["tf.Tensor"] = None, + attention_weights: Optional["tf.Tensor"] = None, + max_history: Optional[int] = None, **kwargs: Any ) -> None: - if featurizer: - if not isinstance(featurizer, FullDialogueTrackerFeaturizer): - raise TypeError( - "Passed tracker featurizer of type {}, " - "should be FullDialogueTrackerFeaturizer." - "".format(type(featurizer).__name__) - ) - super(EmbeddingPolicy, self).__init__(featurizer, priority) + """Declare instant variables with default values""" - # flag if to use the same embeddings for user and bot - try: - self.share_embedding = self.featurizer.state_featurizer.use_shared_vocab - except AttributeError: - self.share_embedding = False + if not featurizer: + featurizer = self._standard_featurizer(max_history) + super(EmbeddingPolicy, self).__init__(featurizer, priority) self._load_params(**kwargs) - # chrono initialization for forget bias - self.characteristic_time = None - # encode all actions with numbers - # persist this array for prediction time - self.encoded_all_actions = encoded_all_actions + self._encoded_all_actions = None # tf related instances self.graph = graph self.session = session - self.a_in = intent_placeholder - self.b_in = action_placeholder - self.c_in = slots_placeholder - self.b_prev_in = prev_act_placeholder - self._dialogue_len = dialogue_len - self._x_for_no_intent_in = x_for_no_intent - self._y_for_no_action_in = y_for_no_action - self._y_for_action_listen_in = y_for_action_listen - self.sim_op = similarity_op - - # store attention probability distribution as - # concatenated tensor of each attention types - self.alignment_history = alignment_history + self.a_in = user_placeholder + self.b_in = bot_placeholder + self.sim_all = similarity_all + self.pred_confidence = pred_confidence + self.sim = similarity # persisted embeddings - self.user_embed = user_embed - self.bot_embed = bot_embed - self.slot_embed = slot_embed self.dial_embed = dial_embed + self.bot_embed = bot_embed + self.all_bot_embed = all_bot_embed - self.rnn_embed = rnn_embed - self.attn_embed = attn_embed - self.copy_attn_debug = copy_attn_debug - - self.all_time_masks = all_time_masks - + self.attention_weights = attention_weights # internal tf instances + self._iterator = None self._train_op = None self._is_training = None - self._loss_scales = None # init helpers def _load_nn_architecture_params(self, config: Dict[Text, Any]) -> None: - self.hidden_layer_sizes = { - "a": config["hidden_layers_sizes_a"], - "b": config["hidden_layers_sizes_b"], + self.hidden_layers_sizes = { + "pre_dial": config["hidden_layers_sizes_pre_dial"], + "bot": config["hidden_layers_sizes_bot"], } - if self.share_embedding: - if self.hidden_layer_sizes["a"] != self.hidden_layer_sizes["b"]: - raise ValueError( - "Due to sharing vocabulary " - "in the featurizer, embedding weights " - "are shared as well. " - "So hidden_layers_sizes_a={} should be " - "equal to hidden_layers_sizes_b={}" - "".format( - self.hidden_layer_sizes["a"], self.hidden_layer_sizes["b"] - ) - ) + self.pos_encoding = config["pos_encoding"] + self.max_seq_length = config["max_seq_length"] + self.num_heads = config["num_heads"] - self.rnn_size = config["rnn_size"] - self.layer_norm = config["layer_norm"] + self.transformer_size = config["transformer_size"] + self.num_transformer_layers = config["num_transformer_layers"] self.batch_size = config["batch_size"] + self.batch_strategy = config["batch_strategy"] self.epochs = config["epochs"] @@ -259,27 +203,21 @@ def _load_embedding_params(self, config: Dict[Text, Any]) -> None: self.mu_pos = config["mu_pos"] self.mu_neg = config["mu_neg"] self.similarity_type = config["similarity_type"] + self.loss_type = config["loss_type"] + if self.similarity_type == "auto": + if self.loss_type == "softmax": + self.similarity_type = "inner" + elif self.loss_type == "margin": + self.similarity_type = "cosine" + self.num_neg = config["num_neg"] self.use_max_sim_neg = config["use_max_sim_neg"] + self.scale_loss = config["scale_loss"] def _load_regularization_params(self, config: Dict[Text, Any]) -> None: self.C2 = config["C2"] self.C_emb = config["C_emb"] - self.scale_loss_by_action_counts = config["scale_loss_by_action_counts"] - self.droprate = { - "a": config["droprate_a"], - "b": config["droprate_b"], - "rnn": config["droprate_rnn"], - } - - def _load_attn_params(self, config: Dict[Text, Any]) -> None: - self.sparse_attention = config["sparse_attention"] - self.attn_shift_range = config["attn_shift_range"] - self.attn_after_rnn = config["attn_after_rnn"] - self.attn_before_rnn = config["attn_before_rnn"] - - def is_using_attention(self): - return self.attn_after_rnn or self.attn_before_rnn + self.droprate = {"bot": config["droprate_b"], "dial": config["droprate_a"]} def _load_visual_params(self, config: Dict[Text, Any]) -> None: self.evaluate_every_num_epochs = config["evaluate_every_num_epochs"] @@ -295,116 +233,234 @@ def _load_params(self, **kwargs: Dict[Text, Any]) -> None: self._load_nn_architecture_params(config) self._load_embedding_params(config) self._load_regularization_params(config) - self._load_attn_params(config) self._load_visual_params(config) # data helpers - # noinspection PyPep8Naming - def _create_X_slots_previous_actions( - self, data_X: np.ndarray - ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]: - """Extract feature vectors - - for user input (X), slots and - previously executed actions from training data. - """ - - featurizer = self.featurizer.state_featurizer - slot_start = featurizer.user_feature_len - previous_start = slot_start + featurizer.slot_feature_len - - X = data_X[:, :, :slot_start] - slots = data_X[:, :, slot_start:previous_start] - previous_actions = data_X[:, :, previous_start:] - - return X, slots, previous_actions - # noinspection PyPep8Naming @staticmethod - def _actions_for_Y(data_Y: np.ndarray) -> np.ndarray: + def _labels_for_Y(data_Y: "np.ndarray") -> "np.ndarray": """Prepare Y data for training: extract actions indices.""" + return data_Y.argmax(axis=-1) # noinspection PyPep8Naming - def _action_features_for_Y(self, actions_for_Y: np.ndarray) -> np.ndarray: + def _action_features_for_Y(self, labels: "np.ndarray") -> "np.ndarray": """Prepare Y data for training: features for action labels.""" - return np.stack( - [ - np.stack( - [self.encoded_all_actions[action_idx] for action_idx in action_ids] - ) - for action_ids in actions_for_Y - ] - ) + if len(labels.shape) == 2: + return np.stack( + [ + np.stack( + [ + self._encoded_all_actions[action_idx] + for action_idx in action_ids + ] + ) + for action_ids in labels + ] + ) + else: + return np.stack( + [self._encoded_all_actions[action_idx] for action_idx in labels] + ) # noinspection PyPep8Naming - @staticmethod - def _create_zero_vector(X: np.ndarray) -> np.ndarray: - """Create zero vector of shape (1, X.shape[-1]).""" + def _create_session_data( + self, data_X: "np.ndarray", data_Y: Optional["np.ndarray"] = None + ) -> "SessionData": + """Combine all tf session related data into a named tuple""" - return np.zeros((1, X.shape[-1]), X.dtype) + if data_Y is not None: + # training time + labels = self._labels_for_Y(data_Y) + Y = self._action_features_for_Y(labels) + + # idea taken from sklearn's stratify split + if labels.ndim == 2: + # for multi-label y, map each distinct row to a string repr + # using join because str(row) uses an ellipsis if len(row) > 1000 + labels = np.array([" ".join(row.astype("str")) for row in labels]) + else: + # prediction time + labels = None + Y = None - def _create_y_for_action_listen(self, domain: "Domain") -> np.ndarray: - """Extract feature vector for action_listen""" - action_listen_idx = domain.index_for_action(ACTION_LISTEN_NAME) - return self.encoded_all_actions[action_listen_idx : action_listen_idx + 1] + return SessionData(X=data_X, Y=Y, labels=labels) # noinspection PyPep8Naming - def _create_all_Y_d(self, dialogue_len: int) -> np.ndarray: - """Stack encoded_all_intents on top of each other + def _train_val_split( + self, session_data: "SessionData" + ) -> Tuple["SessionData", "SessionData"]: + """Create random hold out validation set using stratified split.""" - to create candidates for training examples and - to calculate training accuracy. - """ + label_counts = dict( + zip(*np.unique(session_data.labels, return_counts=True, axis=0)) + ) + counts = np.array([label_counts[label] for label in session_data.labels]) + + multi_X = session_data.X[counts > 1] + multi_Y = session_data.Y[counts > 1] + multi_labels = session_data.labels[counts > 1] + + solo_X = session_data.X[counts == 1] + solo_Y = session_data.Y[counts == 1] + solo_labels = session_data.labels[counts == 1] + + (X_train, X_val, Y_train, Y_val, labels_train, labels_val) = train_test_split( + multi_X, + multi_Y, + multi_labels, + test_size=self.evaluate_on_num_examples, + random_state=self.random_seed, + stratify=multi_labels, + ) + X_train = np.concatenate([X_train, solo_X]) + Y_train = np.concatenate([Y_train, solo_Y]) + labels_train = np.concatenate([labels_train, solo_labels]) + + return ( + SessionData(X=X_train, Y=Y_train, labels=labels_train), + SessionData(X=X_val, Y=Y_val, labels=labels_val), + ) - return np.stack([self.encoded_all_actions] * dialogue_len) + @staticmethod + def _shuffle_session_data(session_data: "SessionData") -> "SessionData": + """Shuffle session data.""" + ids = np.random.permutation(len(session_data.X)) + return SessionData( + X=session_data.X[ids], + Y=session_data.Y[ids], + labels=session_data.labels[ids], + ) + + # tf helpers: # noinspection PyPep8Naming - def _create_tf_session_data( - self, domain: "Domain", data_X: np.ndarray, data_Y: Optional[np.ndarray] = None - ) -> SessionData: - """Combine all tf session related data into a named tuple""" + def _gen_batch( + self, + session_data: "SessionData", + batch_size: int, + batch_strategy: Text = "sequence", + shuffle: bool = False, + ) -> Generator[Tuple["np.ndarray", "np.ndarray"], None, None]: + """Generate batches.""" + + if shuffle: + session_data = self._shuffle_session_data(session_data) + + if batch_strategy == "balanced": + num_examples = len(session_data.X) + unique_labels, counts_labels = np.unique( + session_data.labels, return_counts=True, axis=0 + ) + num_labels = len(unique_labels) + + label_data = [] + for label in unique_labels: + label_data.append( + SessionData( + X=session_data.X[session_data.labels == label], + Y=session_data.Y[session_data.labels == label], + labels=None, # ignore new labels + ) + ) - X, slots, previous_actions = self._create_X_slots_previous_actions(data_X) + data_idx = [0] * num_labels + num_data_cycles = [0] * num_labels + skipped = [False] * num_labels + new_X = [] + new_Y = [] + while min(num_data_cycles) == 0: + if shuffle: + ids = np.random.permutation(num_labels) + else: + ids = range(num_labels) + + for i in ids: + if num_data_cycles[i] > 0 and not skipped[i]: + skipped[i] = True + continue + else: + skipped[i] = False + + num_i = int(counts_labels[i] / num_examples * batch_size) + 1 + + new_X.append(label_data[i].X[data_idx[i] : data_idx[i] + num_i]) + new_Y.append(label_data[i].Y[data_idx[i] : data_idx[i] + num_i]) + + data_idx[i] += num_i + if data_idx[i] >= counts_labels[i]: + num_data_cycles[i] += 1 + data_idx[i] = 0 + + if min(num_data_cycles) > 0: + break + + session_data = SessionData( + X=np.concatenate(new_X), Y=np.concatenate(new_Y), labels=None + ) # ignore new labels + + num_batches = session_data.X.shape[0] // batch_size + int( + session_data.X.shape[0] % batch_size > 0 + ) - if data_Y is not None: - # training time - actions_for_Y = self._actions_for_Y(data_Y) - Y = self._action_features_for_Y(actions_for_Y) - else: - # prediction time - actions_for_Y = None - Y = None + for batch_num in range(num_batches): + batch_x = session_data.X[ + batch_num * batch_size : (batch_num + 1) * batch_size + ] + batch_y = session_data.Y[ + batch_num * batch_size : (batch_num + 1) * batch_size + ] - x_for_no_intent = self._create_zero_vector(X) - y_for_no_action = self._create_zero_vector(previous_actions) - y_for_action_listen = self._create_y_for_action_listen(domain) + yield batch_x, batch_y - # is needed to calculate train accuracy - all_Y_d = self._create_all_Y_d(X.shape[1]) + # noinspection PyPep8Naming + def _create_tf_dataset( + self, + session_data: "SessionData", + batch_size: Union["tf.Tensor", int], + batch_strategy: Text = "sequence", + shuffle: bool = False, + ) -> "tf.data.Dataset": + """Create tf dataset.""" + + # set batch and sequence length to None + shape_X = (None, None, session_data.X[0].shape[-1]) + + if session_data.Y[0].ndim == 1: + shape_Y = (None, session_data.Y[0].shape[-1]) + else: + shape_Y = (None, None, session_data.Y[0].shape[-1]) - return SessionData( - X=X, - Y=Y, - slots=slots, - previous_actions=previous_actions, - actions_for_Y=actions_for_Y, - x_for_no_intent=x_for_no_intent, - y_for_no_action=y_for_no_action, - y_for_action_listen=y_for_action_listen, - all_Y_d=all_Y_d, + return tf.data.Dataset.from_generator( + lambda batch_size_: self._gen_batch( + session_data, batch_size_, batch_strategy, shuffle + ), + output_types=(tf.float32, tf.float32), + output_shapes=(shape_X, shape_Y), + args=([batch_size]), ) - # tf helpers: + @staticmethod + def _create_tf_iterator(dataset: "tf.data.Dataset") -> "tf.data.Iterator": + """Create tf iterator.""" + + return tf.data.Iterator.from_structure( + dataset.output_types, + dataset.output_shapes, + output_classes=dataset.output_classes, + ) def _create_tf_nn( self, - x_in: tf.Tensor, - layer_sizes: List, + x_in: "tf.Tensor", + layer_sizes: List[int], droprate: float, layer_name_suffix: Text, - ) -> tf.Tensor: + activation: Optional[Callable] = tf.nn.relu, + use_bias: bool = True, + kernel_initializer: Optional["tf.keras.initializers.Initializer"] = None, + ) -> "tf.Tensor": """Create nn with hidden layers and name suffix.""" reg = tf.contrib.layers.l2_regularizer(self.C2) @@ -413,7 +469,9 @@ def _create_tf_nn( x = tf.layers.dense( inputs=x, units=layer_size, - activation=tf.nn.relu, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, kernel_regularizer=reg, name="hidden_layer_{}_{}".format(layer_name_suffix, i), reuse=tf.AUTO_REUSE, @@ -421,7 +479,21 @@ def _create_tf_nn( x = tf.layers.dropout(x, rate=droprate, training=self._is_training) return x - def _create_embed(self, x: tf.Tensor, layer_name_suffix: Text) -> tf.Tensor: + def _tf_normalize_if_cosine(self, x: "tf.Tensor") -> "tf.Tensor": + """Normalize embedding if similarity type is cosine.""" + + if self.similarity_type == "cosine": + return tf.nn.l2_normalize(x, -1) + elif self.similarity_type == "inner": + return x + else: + raise ValueError( + "Wrong similarity type '{}', " + "should be 'cosine' or 'inner'" + "".format(self.similarity_type) + ) + + def _create_tf_embed(self, x: "tf.Tensor", layer_name_suffix: Text) -> "tf.Tensor": """Create dense embedding layer with a name.""" reg = tf.contrib.layers.l2_regularizer(self.C2) @@ -433,656 +505,441 @@ def _create_embed(self, x: tf.Tensor, layer_name_suffix: Text) -> tf.Tensor: name="embed_layer_{}".format(layer_name_suffix), reuse=tf.AUTO_REUSE, ) - return embed_x - - def _create_tf_user_embed(self, a_in: tf.Tensor) -> tf.Tensor: - """Create embedding user vector.""" + # normalize embedding vectors for cosine similarity + return self._tf_normalize_if_cosine(embed_x) - layer_name_suffix = "a_and_b" if self.share_embedding else "a" - - a = self._create_tf_nn( - a_in, - self.hidden_layer_sizes["a"], - self.droprate["a"], - layer_name_suffix=layer_name_suffix, - ) - return self._create_embed(a, layer_name_suffix=layer_name_suffix) - - def _create_tf_bot_embed(self, b_in: tf.Tensor) -> tf.Tensor: + def _create_tf_bot_embed(self, b_in: "tf.Tensor") -> "tf.Tensor": """Create embedding bot vector.""" - layer_name_suffix = "a_and_b" if self.share_embedding else "b" - b = self._create_tf_nn( b_in, - self.hidden_layer_sizes["b"], - self.droprate["b"], - layer_name_suffix=layer_name_suffix, + self.hidden_layers_sizes["bot"], + self.droprate["bot"], + layer_name_suffix="bot", ) - return self._create_embed(b, layer_name_suffix=layer_name_suffix) + return self._create_tf_embed(b, layer_name_suffix="bot") - def _create_tf_no_intent_embed(self, x_for_no_intent_i: tf.Tensor) -> tf.Tensor: - """Create embedding user vector for empty intent.""" + def _create_t2t_hparams(self) -> "HParams": + """Create parameters for t2t transformer.""" - layer_name_suffix = "a_and_b" if self.share_embedding else "a" + hparams = transformer_base() - x_for_no_intent = self._create_tf_nn( - x_for_no_intent_i, - self.hidden_layer_sizes["a"], - droprate=0, - layer_name_suffix=layer_name_suffix, - ) - return tf.stop_gradient( - self._create_embed(x_for_no_intent, layer_name_suffix=layer_name_suffix) - ) + hparams.num_hidden_layers = self.num_transformer_layers + hparams.hidden_size = self.transformer_size + # it seems to be factor of 4 for transformer architectures in t2t + hparams.filter_size = hparams.hidden_size * 4 + hparams.num_heads = self.num_heads + hparams.relu_dropout = self.droprate["dial"] + hparams.pos = self.pos_encoding - def _create_tf_no_action_embed(self, y_for_no_action_in: tf.Tensor) -> tf.Tensor: - """Create embedding bot vector for empty action and action_listen.""" + hparams.max_length = self.max_seq_length - layer_name_suffix = "a_and_b" if self.share_embedding else "b" + hparams.unidirectional_encoder = True - y_for_no_action = self._create_tf_nn( - y_for_no_action_in, - self.hidden_layer_sizes["b"], - droprate=0, - layer_name_suffix=layer_name_suffix, - ) - return tf.stop_gradient( - self._create_embed(y_for_no_action, layer_name_suffix=layer_name_suffix) - ) + hparams.self_attention_type = "dot_product_relative_v2" + hparams.max_relative_position = 5 + hparams.add_relative_to_values = True - def _create_rnn_cell(self) -> tf.contrib.rnn.RNNCell: - """Create one rnn cell.""" + return hparams - # chrono initialization for forget bias - # assuming that characteristic time is max dialogue length - # left border that initializes forget gate close to 0 - bias_0 = -1.0 + # noinspection PyUnresolvedReferences + def _create_t2t_transformer_encoder( + self, + x_in: "tf.Tensor", + mask: "tf.Tensor", + attention_weights: Dict[Text, "tf.Tensor"], + ) -> "tf.Tensor": + """Create t2t transformer encoder.""" + + hparams = self._create_t2t_hparams() + + # When not in training mode, set all forms of dropout to zero. + for key, value in hparams.values().items(): + if key.endswith("dropout") or key == "label_smoothing": + setattr(hparams, key, value * tf.cast(self._is_training, tf.float32)) + + with tf.variable_scope("transformer", reuse=tf.AUTO_REUSE): + x = self._create_tf_nn( + x_in, + [hparams.hidden_size], + hparams.layer_prepostprocess_dropout, + layer_name_suffix="pre_embed", + activation=None, + use_bias=False, + kernel_initializer=tf.random_normal_initializer( + 0.0, hparams.hidden_size ** -0.5 + ), + ) + if hparams.multiply_embedding_mode == "sqrt_depth": + x *= hparams.hidden_size ** 0.5 - # right border that initializes forget gate close to 1 - bias_1 = np.log(self.characteristic_time - 1.0) - fbias = (bias_1 - bias_0) * np.random.random(self.rnn_size) + bias_0 + x *= tf.expand_dims(mask, -1) + ( + x, + self_attention_bias, + encoder_decoder_attention_bias, + ) = transformer_prepare_encoder(x, None, hparams) + + x *= tf.expand_dims(mask, -1) + + x = tf.nn.dropout(x, 1.0 - hparams.layer_prepostprocess_dropout) + + attn_bias_for_padding = None + # Otherwise the encoder will just use encoder_self_attention_bias. + if hparams.unidirectional_encoder: + attn_bias_for_padding = encoder_decoder_attention_bias + + x = transformer_encoder( + x, + self_attention_bias, + hparams, + nonpadding=mask, + save_weights_to=attention_weights, + attn_bias_for_padding=attn_bias_for_padding, + ) - if self.attn_after_rnn: - # since attention is copied to rnn output, - # embedding should be performed inside the cell - embed_layer_size = self.embed_dim - else: - embed_layer_size = None + x *= tf.expand_dims(mask, -1) - keep_prob = 1.0 - ( - self.droprate["rnn"] * tf.cast(self._is_training, tf.float32) - ) + return tf.nn.dropout( + tf.nn.relu(x), 1.0 - hparams.layer_prepostprocess_dropout + ) - return ChronoBiasLayerNormBasicLSTMCell( - num_units=self.rnn_size, - layer_norm=self.layer_norm, - forget_bias=fbias, - input_bias=-fbias, - dropout_keep_prob=keep_prob, - out_layer_size=embed_layer_size, - ) + def _create_tf_dial(self, a_in) -> Tuple["tf.Tensor", "tf.Tensor"]: + """Create dialogue level embedding and mask.""" - @staticmethod - def _num_units(memory: tf.Tensor) -> int: - return memory.shape[-1].value - - def _create_attn_mech( - self, memory: tf.Tensor, real_length: tf.Tensor - ) -> tf.contrib.seq2seq.AttentionMechanism: - - return tf.contrib.seq2seq.BahdanauAttention( - num_units=self._num_units(memory), - memory=memory, - memory_sequence_length=real_length, - normalize=True, - probability_fn=tf.identity, - # we only attend to memory up to a current time step - # it does not affect alignments, but - # is important for interpolation gate - score_mask_value=0, - ) + # mask different length sequences + # if there is at least one `-1` it should be masked + mask = tf.sign(tf.reduce_max(self.a_in, -1) + 1) - def cell_input_fn( - self, - rnn_inputs: tf.Tensor, - attention: tf.Tensor, - num_cell_input_memory_units: int, - ) -> tf.Tensor: - """Combine rnn inputs and attention into cell input. + a = self._create_tf_nn( + a_in, + self.hidden_layers_sizes["pre_dial"], + self.droprate["dial"], + layer_name_suffix="pre_dial", + ) - Args: - rnn_inputs: Tensor, first output from `rnn_and_attn_inputs_fn`. + self.attention_weights = {} + a = self._create_t2t_transformer_encoder(a, mask, self.attention_weights) - attention: Tensor, concatenated all attentions for one time step. + dial_embed = self._create_tf_embed(a, layer_name_suffix="dial") - num_cell_input_memory_units: int, number of the first units in - `attention` that are responsible for - enhancing cell input. + if isinstance(self.featurizer, MaxHistoryTrackerFeaturizer): + # pick last action if max history featurizer is used + dial_embed = dial_embed[:, -1:, :] + mask = mask[:, -1:] - Returns: - A Tensor `cell_inputs` to feed to an rnn cell. - """ + return dial_embed, mask - if num_cell_input_memory_units: - if num_cell_input_memory_units == self.embed_dim: - # since attention can contain additional - # attention mechanisms, only attention - # from previous user input is used as an input - # for rnn cell and only if memory before rnn - # is the same size as embed_utter - return tf.concat( - [ - rnn_inputs[:, : self.embed_dim] - + attention[:, :num_cell_input_memory_units], - rnn_inputs[:, self.embed_dim :], - ], - -1, - ) - else: - # in current implementation it cannot fall here, - # but this Exception exists in case - # attention before rnn is changed - raise ValueError( - "Number of memory units {} is not " - "equal to number of utter units {}. " - "Please modify cell input function " - "accordingly." - "".format(num_cell_input_memory_units, self.embed_dim) - ) - else: - return rnn_inputs + @staticmethod + def _tf_make_flat(x: "tf.Tensor") -> "tf.Tensor": + """Make tensor 2D.""" - def rnn_and_attn_inputs_fn( - self, inputs: tf.Tensor, cell_state: tf.Tensor - ) -> Tuple[tf.Tensor, tf.Tensor]: - """Construct rnn input and attention mechanism input. + return tf.reshape(x, (-1, x.shape[-1])) - Args: - inputs: Tensor, concatenated all embeddings for one time step: - [embed_utter, embed_slots, embed_prev_action]. + @staticmethod + def _tf_sample_neg( + batch_size: "tf.Tensor", all_bs: "tf.Tensor", neg_ids: "tf.Tensor" + ) -> "tf.Tensor": + """Sample negative examples for given indices""" - cell_state: Tensor, state of an rnn cell. + tiled_all_bs = tf.tile(tf.expand_dims(all_bs, 0), (batch_size, 1, 1)) - Returns: - Tuple of Tensors `rnn_inputs, attn_inputs` to feed to - rnn and attention mechanisms. - """ + return tf.batch_gather(tiled_all_bs, neg_ids) - # the hidden state c and slots are not included, - # in hope that algorithm would learn correct attention - # regardless of the hidden state c of an lstm and slots - if isinstance(cell_state, tf.contrib.rnn.LSTMStateTuple): - attn_inputs = tf.concat([inputs[:, : self.embed_dim], cell_state.h], -1) - else: - attn_inputs = tf.concat([inputs[:, : self.embed_dim], cell_state], -1) + def _tf_calc_iou_mask( + self, pos_b: "tf.Tensor", all_bs: "tf.Tensor", neg_ids: "tf.Tensor" + ) -> "tf.Tensor": + """Calculate IOU mask for given indices""" - # include slots in inputs but exclude previous action, since - # rnn should get previous action from its hidden state - rnn_inputs = inputs[:, : (self.embed_dim + self.embed_dim)] + pos_b_in_flat = tf.expand_dims(pos_b, -2) + neg_b_in_flat = self._tf_sample_neg(tf.shape(pos_b)[0], all_bs, neg_ids) - return rnn_inputs, attn_inputs + intersection_b_in_flat = tf.minimum(neg_b_in_flat, pos_b_in_flat) + union_b_in_flat = tf.maximum(neg_b_in_flat, pos_b_in_flat) - def _create_attn_cell( - self, - cell: tf.contrib.rnn.RNNCell, - embed_utter: tf.Tensor, - embed_prev_action: tf.Tensor, - real_length: tf.Tensor, - embed_for_no_intent: tf.Tensor, - embed_for_no_action: tf.Tensor, - embed_for_action_listen: tf.Tensor, - ) -> tf.contrib.rnn.RNNCell: - """Wrap cell in attention wrapper with given memory.""" - - if self.attn_before_rnn: - # create attention over previous user input - num_memory_units_before_rnn = self._num_units(embed_utter) - attn_mech = self._create_attn_mech(embed_utter, real_length) - - # create mask for empty user input not to pay attention to it - ignore_mask = tf.reduce_all( - tf.equal(tf.expand_dims(embed_for_no_intent, 0), embed_utter), -1 - ) + iou = tf.reduce_sum(intersection_b_in_flat, -1) / tf.reduce_sum( + union_b_in_flat, -1 + ) + return 1.0 - tf.nn.relu(tf.sign(1.0 - iou)) - # do not use attention by location before rnn - attn_shift_range = 0 - else: - attn_mech = None - ignore_mask = None - num_memory_units_before_rnn = None - attn_shift_range = None - - if self.attn_after_rnn: - # create attention over previous bot actions - attn_mech_after_rnn = self._create_attn_mech(embed_prev_action, real_length) - - # create mask for empty bot action or action_listen - # not to pay attention to them - ignore_mask_listen = tf.logical_or( - tf.reduce_all( - tf.equal(tf.expand_dims(embed_for_no_action, 0), embed_prev_action), - -1, - ), - tf.reduce_all( - tf.equal( - tf.expand_dims(embed_for_action_listen, 0), embed_prev_action - ), - -1, - ), - ) + def _tf_get_negs( + self, all_embed: "tf.Tensor", all_raw: "tf.Tensor", raw_pos: "tf.Tensor" + ) -> Tuple["tf.Tensor", "tf.Tensor"]: + """Get negative examples from given tensor.""" - if attn_mech is not None: - # if there is another attention mechanism, - # create a list of attention mechanisms - attn_mech = [attn_mech, attn_mech_after_rnn] - ignore_mask = [ignore_mask, ignore_mask_listen] - attn_shift_range = [attn_shift_range, self.attn_shift_range] - else: - attn_mech = attn_mech_after_rnn - ignore_mask = ignore_mask_listen - attn_shift_range = self.attn_shift_range + batch_size = tf.shape(raw_pos)[0] + seq_length = tf.shape(raw_pos)[1] + raw_flat = self._tf_make_flat(raw_pos) - # this particular attention mechanism is unusual - # in the sense that its calculated attention vector is directly - # added to cell output, therefore enabling copy mechanism + total_candidates = tf.shape(all_embed)[0] - # `index_of_attn_to_copy` is used by `TimeAttentionWrapper`, - # to know which attention to copy - index_of_attn_to_copy = -1 - else: - index_of_attn_to_copy = None - - return TimeAttentionWrapper( - cell=cell, - attention_mechanism=attn_mech, - sequence_len=self._dialogue_len, - attn_shift_range=attn_shift_range, - sparse_attention=self.sparse_attention, - rnn_and_attn_inputs_fn=self.rnn_and_attn_inputs_fn, - ignore_mask=ignore_mask, - cell_input_fn=lambda inputs, attention: ( - self.cell_input_fn(inputs, attention, num_memory_units_before_rnn) - ), - index_of_attn_to_copy=index_of_attn_to_copy, - likelihood_fn=lambda emb_1, emb_2: (self._tf_sim(emb_1, emb_2, None)), - tensor_not_to_copy=embed_for_action_listen, - output_attention=True, - alignment_history=True, + all_indices = tf.tile( + tf.expand_dims(tf.range(0, total_candidates, 1), 0), + (batch_size * seq_length, 1), + ) + shuffled_indices = tf.transpose( + tf.random.shuffle(tf.transpose(all_indices, (1, 0))), (1, 0) ) + neg_ids = shuffled_indices[:, : self.num_neg] - def _create_tf_dial_embed( - self, - embed_utter: tf.Tensor, - embed_slots: tf.Tensor, - embed_prev_action: tf.Tensor, - mask: tf.Tensor, - embed_for_no_intent: tf.Tensor, - embed_for_no_action: tf.Tensor, - embed_for_action_listen: tf.Tensor, - ) -> Tuple[tf.Tensor, Union[tf.Tensor, "TimeAttentionWrapperState"]]: - """Create rnn for dialogue level embedding.""" - - cell_input = tf.concat([embed_utter, embed_slots, embed_prev_action], -1) - - cell = self._create_rnn_cell() - - real_length = tf.cast(tf.reduce_sum(mask, 1), tf.int32) - - if self.is_using_attention(): - cell = self._create_attn_cell( - cell, - embed_utter, - embed_prev_action, - real_length, - embed_for_no_intent, - embed_for_no_action, - embed_for_action_listen, - ) + bad_negs_flat = self._tf_calc_iou_mask(raw_flat, all_raw, neg_ids) + bad_negs = tf.reshape(bad_negs_flat, (batch_size, seq_length, -1)) - return tf.nn.dynamic_rnn( - cell, - cell_input, - dtype=tf.float32, - sequence_length=real_length, - scope="rnn_decoder", + neg_embed_flat = self._tf_sample_neg( + batch_size * seq_length, all_embed, neg_ids + ) + neg_embed = tf.reshape( + neg_embed_flat, (batch_size, seq_length, -1, all_embed.shape[-1]) ) - @staticmethod - def _alignments_history_from(final_state: "TimeAttentionWrapperState") -> tf.Tensor: - """Extract alignments history form final rnn cell state.""" - - alignments_from_state = final_state.alignment_history - if not isinstance(alignments_from_state, tuple): - alignments_from_state = [alignments_from_state] + return neg_embed, bad_negs - alignment_history = [] - for alignments in alignments_from_state: - # reshape to (batch, time, memory_time) - alignment_history.append(tf.transpose(alignments.stack(), [1, 0, 2])) + def _sample_negatives( + self, all_actions: "tf.Tensor" + ) -> Tuple[ + "tf.Tensor", "tf.Tensor", "tf.Tensor", "tf.Tensor", "tf.Tensor", "tf.Tensor" + ]: + """Sample negative examples.""" - return tf.concat(alignment_history, -1) + pos_dial_embed = tf.expand_dims(self.dial_embed, -2) + neg_dial_embed, dial_bad_negs = self._tf_get_negs( + self._tf_make_flat(self.dial_embed), + self._tf_make_flat(self.b_in), + self.b_in, + ) + pos_bot_embed = tf.expand_dims(self.bot_embed, -2) + neg_bot_embed, bot_bad_negs = self._tf_get_negs( + self.all_bot_embed, all_actions, self.b_in + ) + return ( + pos_dial_embed, + pos_bot_embed, + neg_dial_embed, + neg_bot_embed, + dial_bad_negs, + bot_bad_negs, + ) @staticmethod - def _all_time_masks_from(final_state: "TimeAttentionWrapperState") -> tf.Tensor: - """Extract all time masks form final rnn cell state.""" - - # reshape to (batch, time, memory_time) and ignore last time - # because time_mask is created for the next time step - return tf.transpose(final_state.all_time_masks.stack(), [1, 0, 2])[:, :-1, :] - - def _sims_rnn_to_max_from(self, cell_output: tf.Tensor) -> List[tf.Tensor]: - """Save intermediate tensors for debug purposes.""" - - if self.attn_after_rnn: - # extract additional debug tensors - num_add = TimeAttentionWrapper.additional_output_size() - self.copy_attn_debug = cell_output[:, :, -num_add:] - - # extract additional similarity to maximize - sim_attn_to_max = cell_output[:, :, -num_add] - sim_state_to_max = cell_output[:, :, -num_add + 1] - return [sim_attn_to_max, sim_state_to_max] - else: - return [] - - def _embed_dialogue_from(self, cell_output: tf.Tensor) -> tf.Tensor: - """Extract or calculate dialogue level embedding from cell_output.""" - - if self.attn_after_rnn: - # embedding layer is inside rnn cell - embed_dialogue = cell_output[:, :, : self.embed_dim] - - # extract additional debug tensors - num_add = TimeAttentionWrapper.additional_output_size() - self.rnn_embed = cell_output[ - :, :, self.embed_dim : (self.embed_dim + self.embed_dim) - ] - self.attn_embed = cell_output[ - :, :, (self.embed_dim + self.embed_dim) : -num_add - ] - else: - # add embedding layer to rnn cell output - embed_dialogue = self._create_embed( - cell_output[:, :, : self.rnn_size], layer_name_suffix="out" - ) - if self.attn_before_rnn: - # extract additional debug tensors - self.attn_embed = cell_output[:, :, self.rnn_size :] + def _tf_raw_sim(a: "tf.Tensor", b: "tf.Tensor", mask: "tf.Tensor") -> "tf.Tensor": + """Calculate similarity between given tensors.""" - return embed_dialogue + return tf.reduce_sum(a * b, -1) * tf.expand_dims(mask, 2) def _tf_sim( self, - embed_dialogue: tf.Tensor, - embed_action: tf.Tensor, - mask: Optional[tf.Tensor], - ) -> Tuple[tf.Tensor, tf.Tensor]: - """Define similarity. - - This method has two roles: - - calculate similarity between - two embedding vectors of the same size - and output binary mask and similarity; - - calculate similarity with several embedded actions for the loss - and output similarities between user input and bot actions - and similarities between bot actions. - - They are kept in the same helper method, - because it is necessary for them to be mathematically identical. - """ - - if self.similarity_type == "cosine": - # normalize embedding vectors for cosine similarity - embed_dialogue = tf.nn.l2_normalize(embed_dialogue, -1) - embed_action = tf.nn.l2_normalize(embed_action, -1) - - if self.similarity_type in {"cosine", "inner"}: - - if len(embed_dialogue.shape) == len(embed_action.shape): - # calculate similarity between - # two embedding vectors of the same size - sim = tf.reduce_sum(embed_dialogue * embed_action, -1, keepdims=True) - bin_sim = tf.where( - sim > (self.mu_pos - self.mu_neg) / 2.0, - tf.ones_like(sim), - tf.zeros_like(sim), - ) - - # output binary mask and similarity - return bin_sim, sim - - else: - # calculate similarity with several - # embedded actions for the loss - sim = tf.reduce_sum( - tf.expand_dims(embed_dialogue, -2) * embed_action, -1 - ) - sim *= tf.expand_dims(mask, 2) - - sim_act = tf.reduce_sum( - embed_action[:, :, :1, :] * embed_action[:, :, 1:, :], -1 - ) - sim_act *= tf.expand_dims(mask, 2) - - # output similarities between user input and bot actions - # and similarities between bot actions - return sim, sim_act + pos_dial_embed: "tf.Tensor", + pos_bot_embed: "tf.Tensor", + neg_dial_embed: "tf.Tensor", + neg_bot_embed: "tf.Tensor", + dial_bad_negs: "tf.Tensor", + bot_bad_negs: "tf.Tensor", + mask: "tf.Tensor", + ) -> Tuple["tf.Tensor", "tf.Tensor", "tf.Tensor", "tf.Tensor", "tf.Tensor"]: + """Define similarity.""" + + # calculate similarity with several + # embedded actions for the loss + neg_inf = large_compatible_negative(pos_dial_embed.dtype) + + sim_pos = self._tf_raw_sim(pos_dial_embed, pos_bot_embed, mask) + sim_neg = ( + self._tf_raw_sim(pos_dial_embed, neg_bot_embed, mask) + + neg_inf * bot_bad_negs + ) + sim_neg_bot_bot = ( + self._tf_raw_sim(pos_bot_embed, neg_bot_embed, mask) + + neg_inf * bot_bad_negs + ) + sim_neg_dial_dial = ( + self._tf_raw_sim(pos_dial_embed, neg_dial_embed, mask) + + neg_inf * dial_bad_negs + ) + sim_neg_bot_dial = ( + self._tf_raw_sim(pos_bot_embed, neg_dial_embed, mask) + + neg_inf * dial_bad_negs + ) - else: - raise ValueError( - "Wrong similarity type {}, " - "should be 'cosine' or 'inner'" - "".format(self.similarity_type) - ) + # output similarities between user input and bot actions + # and similarities between bot actions and similarities between user inputs + return sim_pos, sim_neg, sim_neg_bot_bot, sim_neg_dial_dial, sim_neg_bot_dial - def _regularization_loss(self) -> Union[tf.Tensor, int]: - """Add regularization to the embed layer inside rnn cell.""" + @staticmethod + def _tf_calc_accuracy(sim_pos: "tf.Tensor", sim_neg: "tf.Tensor") -> "tf.Tensor": + """Calculate accuracy""" - if self.attn_after_rnn: - return self.C2 * tf.add_n( - [ - tf.nn.l2_loss(tf_var) - for tf_var in tf.trainable_variables() - if "cell/out_layer/kernel" in tf_var.name - ] - ) - else: - return 0 + max_all_sim = tf.reduce_max(tf.concat([sim_pos, sim_neg], -1), -1) + return tf.reduce_mean( + tf.cast(tf.math.equal(max_all_sim, sim_pos[:, :, 0]), tf.float32) + ) - def _tf_loss( + def _tf_loss_margin( self, - sim: tf.Tensor, - sim_act: tf.Tensor, - sims_rnn_to_max: List[tf.Tensor], - mask: tf.Tensor, - ) -> tf.Tensor: - """Define loss.""" + sim_pos: "tf.Tensor", + sim_neg: "tf.Tensor", + sim_neg_bot_bot: "tf.Tensor", + sim_neg_dial_dial: "tf.Tensor", + sim_neg_bot_dial: "tf.Tensor", + mask: "tf.Tensor", + ) -> "tf.Tensor": + """Define max margin loss.""" # loss for maximizing similarity with correct action - loss = tf.maximum(0.0, self.mu_pos - sim[:, :, 0]) + loss = tf.maximum(0.0, self.mu_pos - sim_pos[:, :, 0]) # loss for minimizing similarity with `num_neg` incorrect actions if self.use_max_sim_neg: # minimize only maximum similarity over incorrect actions - max_sim_neg = tf.reduce_max(sim[:, :, 1:], -1) + max_sim_neg = tf.reduce_max(sim_neg, -1) loss += tf.maximum(0.0, self.mu_neg + max_sim_neg) else: # minimize all similarities with incorrect actions - max_margin = tf.maximum(0.0, self.mu_neg + sim[:, :, 1:]) + max_margin = tf.maximum(0.0, self.mu_neg + sim_neg) loss += tf.reduce_sum(max_margin, -1) - if self.scale_loss_by_action_counts: - # scale loss inverse proportionally to number of action counts - loss *= self._loss_scales + # penalize max similarity between pos bot and neg bot embeddings + max_sim_neg_bot = tf.maximum(0.0, tf.reduce_max(sim_neg_bot_bot, -1)) + loss += max_sim_neg_bot * self.C_emb - # penalize max similarity between intent embeddings - loss_act = tf.maximum(0.0, tf.reduce_max(sim_act, -1)) - loss += loss_act * self.C_emb + # penalize max similarity between pos dial and neg dial embeddings + max_sim_neg_dial = tf.maximum(0.0, tf.reduce_max(sim_neg_dial_dial, -1)) + loss += max_sim_neg_dial * self.C_emb - # maximize similarity returned by time attention wrapper - for sim_to_add in sims_rnn_to_max: - loss += tf.maximum(0.0, 1.0 - sim_to_add) + # penalize max similarity between pos bot and neg dial embeddings + max_sim_neg_dial = tf.maximum(0.0, tf.reduce_max(sim_neg_bot_dial, -1)) + loss += max_sim_neg_dial * self.C_emb # mask loss for different length sequences loss *= mask # average the loss over sequence length loss = tf.reduce_sum(loss, -1) / tf.reduce_sum(mask, 1) - # average the loss over the batch - loss = ( - tf.reduce_mean(loss) - # add regularization losses - + self._regularization_loss() - + tf.losses.get_regularization_loss() - ) - return loss + loss = tf.reduce_mean(loss) - # training methods + # add regularization losses + loss += tf.losses.get_regularization_loss() - def train( - self, - training_trackers: List[DialogueStateTracker], - domain: Domain, - **kwargs: Any - ) -> None: - """Train the policy on given training trackers.""" - - logger.debug("Started training embedding policy.") - - # set numpy random seed - np.random.seed(self.random_seed) - - # dealing with training data - training_data = self.featurize_for_training(training_trackers, domain, **kwargs) - # assume that characteristic time is the mean length of the dialogues - self.characteristic_time = np.mean(training_data.true_length) - if self.attn_shift_range is None: - self.attn_shift_range = int(self.characteristic_time / 2) + return loss - # encode all actions with policies' featurizer - self.encoded_all_actions = self.featurizer.state_featurizer.create_encoded_all_actions( - domain + def _tf_loss_softmax( + self, + sim_pos: "tf.Tensor", + sim_neg: "tf.Tensor", + sim_neg_bot_bot: "tf.Tensor", + sim_neg_dial_dial: "tf.Tensor", + sim_neg_bot_dial: "tf.Tensor", + mask: "tf.Tensor", + ) -> "tf.Tensor": + """Define softmax loss.""" + + logits = tf.concat( + [sim_pos, sim_neg, sim_neg_bot_bot, sim_neg_dial_dial, sim_neg_bot_dial], -1 ) - # check if number of negatives is less than number of actions - logger.debug( - "Check if num_neg {} is smaller " - "than number of actions {}, " - "else set num_neg to the number of actions - 1" - "".format(self.num_neg, domain.num_actions) - ) - self.num_neg = min(self.num_neg, domain.num_actions - 1) + # create labels for softmax + pos_labels = tf.ones_like(logits[:, :, :1]) + neg_labels = tf.zeros_like(logits[:, :, 1:]) + labels = tf.concat([pos_labels, neg_labels], -1) - # extract actual training data to feed to tf session - session_data = self._create_tf_session_data( - domain, training_data.X, training_data.y - ) + if self.scale_loss: + # mask loss by prediction confidence + pred = tf.nn.softmax(logits) + mask *= tf.pow((1 - pred[:, :, 0]) / 0.5, 4) - self.graph = tf.Graph() + loss = tf.losses.softmax_cross_entropy(labels, logits, mask) + # add regularization losses + loss += tf.losses.get_regularization_loss() - with self.graph.as_default(): - # set random seed in tf - tf.set_random_seed(self.random_seed) - - dialogue_len = None # use dynamic time for rnn - # create placeholders - self.a_in = tf.placeholder( - dtype=tf.float32, - shape=(None, dialogue_len, session_data.X.shape[-1]), - name="a", - ) - self.b_in = tf.placeholder( - dtype=tf.float32, - shape=(None, dialogue_len, None, session_data.Y.shape[-1]), - name="b", - ) - self.c_in = tf.placeholder( - dtype=tf.float32, - shape=(None, dialogue_len, session_data.slots.shape[-1]), - name="slt", - ) - self.b_prev_in = tf.placeholder( - dtype=tf.float32, - shape=(None, dialogue_len, session_data.Y.shape[-1]), - name="b_prev", - ) - self._dialogue_len = tf.placeholder( - dtype=tf.int32, shape=(), name="dialogue_len" - ) - self._x_for_no_intent_in = tf.placeholder( - dtype=tf.float32, - shape=(1, session_data.X.shape[-1]), - name="x_for_no_intent", - ) - self._y_for_no_action_in = tf.placeholder( - dtype=tf.float32, - shape=(1, session_data.Y.shape[-1]), - name="y_for_no_action", - ) - self._y_for_action_listen_in = tf.placeholder( - dtype=tf.float32, - shape=(1, session_data.Y.shape[-1]), - name="y_for_action_listen", - ) - self._is_training = tf.placeholder_with_default(False, shape=()) - - self._loss_scales = tf.placeholder( - dtype=tf.float32, shape=(None, dialogue_len) - ) - - # create embedding vectors - self.user_embed = self._create_tf_user_embed(self.a_in) - self.bot_embed = self._create_tf_bot_embed(self.b_in) - self.slot_embed = self._create_embed(self.c_in, layer_name_suffix="slt") + return loss - embed_prev_action = self._create_tf_bot_embed(self.b_prev_in) - embed_for_no_intent = self._create_tf_no_intent_embed( - self._x_for_no_intent_in + def _choose_loss( + self, + sim_pos: "tf.Tensor", + sim_neg: "tf.Tensor", + sim_neg_bot_bot: "tf.Tensor", + sim_neg_dial_dial: "tf.Tensor", + sim_neg_bot_dial: "tf.Tensor", + mask: "tf.Tensor", + ) -> "tf.Tensor": + """Use loss depending on given option.""" + + if self.loss_type == "margin": + return self._tf_loss_margin( + sim_pos, + sim_neg, + sim_neg_bot_bot, + sim_neg_dial_dial, + sim_neg_bot_dial, + mask, ) - embed_for_no_action = self._create_tf_no_action_embed( - self._y_for_no_action_in + elif self.loss_type == "softmax": + return self._tf_loss_softmax( + sim_pos, + sim_neg, + sim_neg_bot_bot, + sim_neg_dial_dial, + sim_neg_bot_dial, + mask, ) - embed_for_action_listen = self._create_tf_no_action_embed( - self._y_for_action_listen_in + else: + raise ValueError( + "Wrong loss type '{}', " + "should be 'margin' or 'softmax'" + "".format(self.loss_type) ) - # mask different length sequences - # if there is at least one `-1` it should be masked - mask = tf.sign(tf.reduce_max(self.a_in, -1) + 1) - - # get rnn output - cell_output, final_state = self._create_tf_dial_embed( - self.user_embed, - self.slot_embed, - embed_prev_action, - mask, - embed_for_no_intent, - embed_for_no_action, - embed_for_action_listen, - ) - # process rnn output - if self.is_using_attention(): - self.alignment_history = self._alignments_history_from(final_state) + def _build_tf_train_graph(self) -> Tuple["tf.Tensor", "tf.Tensor"]: + """Bulid train graph using iterator.""" - self.all_time_masks = self._all_time_masks_from(final_state) + # session data are int counts but we need a float tensors + self.a_in, self.b_in = self._iterator.get_next() - sims_rnn_to_max = self._sims_rnn_to_max_from(cell_output) - self.dial_embed = self._embed_dialogue_from(cell_output) + all_actions = tf.constant( + self._encoded_all_actions, dtype=tf.float32, name="all_actions" + ) - # calculate similarities - self.sim_op, sim_act = self._tf_sim(self.dial_embed, self.bot_embed, mask) - # construct loss - loss = self._tf_loss(self.sim_op, sim_act, sims_rnn_to_max, mask) + self.dial_embed, mask = self._create_tf_dial(self.a_in) + + self.bot_embed = self._create_tf_bot_embed(self.b_in) + self.all_bot_embed = self._create_tf_bot_embed(all_actions) + + if isinstance(self.featurizer, MaxHistoryTrackerFeaturizer): + # add time dimension if max history featurizer is used + self.b_in = self.b_in[:, tf.newaxis, :] + self.bot_embed = self.bot_embed[:, tf.newaxis, :] + + ( + pos_dial_embed, + pos_bot_embed, + neg_dial_embed, + neg_bot_embed, + dial_bad_negs, + bot_bad_negs, + ) = self._sample_negatives(all_actions) + + # calculate similarities + ( + sim_pos, + sim_neg, + sim_neg_bot_bot, + sim_neg_dial_dial, + sim_neg_bot_dial, + ) = self._tf_sim( + pos_dial_embed, + pos_bot_embed, + neg_dial_embed, + neg_bot_embed, + dial_bad_negs, + bot_bad_negs, + mask, + ) - # define which optimizer to use - self._train_op = tf.train.AdamOptimizer( - learning_rate=0.001, epsilon=1e-16 - ).minimize(loss) - # train tensorflow graph - self.session = tf.Session(config=self._tf_config) + acc = self._tf_calc_accuracy(sim_pos, sim_neg) - self._train_tf(session_data, loss, mask) + loss = self._choose_loss( + sim_pos, sim_neg, sim_neg_bot_bot, sim_neg_dial_dial, sim_neg_bot_dial, mask + ) + return loss, acc # training helpers def _linearly_increasing_batch_size(self, epoch: int) -> int: @@ -1102,196 +959,253 @@ def _linearly_increasing_batch_size(self, epoch: int) -> int: else: return int(self.batch_size[0]) - def _create_batch_b( - self, batch_pos_b: np.ndarray, intent_ids: np.ndarray - ) -> np.ndarray: - """Create batch of actions. + def _train_tf_dataset( + self, + train_init_op: "tf.Operation", + eval_init_op: "tf.Operation", + batch_size_in: "tf.Tensor", + loss: "tf.Tensor", + acc: "tf.Tensor", + ) -> None: + """Train tf graph""" - The first is correct action - and the rest are wrong actions sampled randomly. - """ + self.session.run(tf.global_variables_initializer()) - batch_pos_b = batch_pos_b[:, :, np.newaxis, :] + if self.evaluate_on_num_examples: + logger.info( + "Validation accuracy is calculated every {} epochs" + "".format(self.evaluate_every_num_epochs) + ) + pbar = tqdm(range(self.epochs), desc="Epochs", disable=is_logging_disabled()) - # sample negatives - batch_neg_b = np.zeros( - ( - batch_pos_b.shape[0], - batch_pos_b.shape[1], - self.num_neg, - batch_pos_b.shape[-1], - ), - dtype=int, - ) - for b in range(batch_pos_b.shape[0]): - for h in range(batch_pos_b.shape[1]): - # create negative indexes out of possible ones - # except for correct index of b - negative_indexes = [ - i - for i in range(self.encoded_all_actions.shape[0]) - if i != intent_ids[b, h] - ] + train_loss = 0 + train_acc = 0 + eval_loss = 0 + eval_acc = 0 + for ep in pbar: - negs = np.random.choice(negative_indexes, size=self.num_neg) + batch_size = self._linearly_increasing_batch_size(ep) - batch_neg_b[b, h] = self.encoded_all_actions[negs] + self.session.run(train_init_op, feed_dict={batch_size_in: batch_size}) - return np.concatenate([batch_pos_b, batch_neg_b], -2) + ep_train_loss = 0 + ep_train_acc = 0 + batches_per_epoch = 0 + while True: + try: + _, batch_train_loss, batch_train_acc = self.session.run( + [self._train_op, loss, acc], feed_dict={self._is_training: True} + ) + batches_per_epoch += 1 + ep_train_loss += batch_train_loss + ep_train_acc += batch_train_acc - # noinspection PyPep8Naming - def _scale_loss_by_count_actions( - self, - X: np.ndarray, - slots: np.ndarray, - previous_actions: np.ndarray, - actions_for_Y: np.ndarray, - ) -> Union[np.ndarray, List[List]]: - """Calculate inverse proportionality of repeated actions.""" - - if self.scale_loss_by_action_counts: - full_X = np.concatenate( - [X, slots, previous_actions, actions_for_Y[:, :, np.newaxis]], -1 + except tf.errors.OutOfRangeError: + break + + train_loss = ep_train_loss / batches_per_epoch + train_acc = ep_train_acc / batches_per_epoch + + pbar.set_postfix( + {"loss": "{:.3f}".format(train_loss), "acc": "{:.3f}".format(train_acc)} + ) + + if eval_init_op is not None: + if (ep + 1) % self.evaluate_every_num_epochs == 0 or ( + ep + 1 + ) == self.epochs: + eval_loss, eval_acc = self._output_training_stat_dataset( + eval_init_op, loss, acc + ) + if (ep + 1) != self.epochs: + logger.info( + "Evaluation results: " + "validation loss: {:.3f}, " + "validation accuracy: {:.3f}" + "".format(eval_loss, eval_acc) + ) + + final_message = ( + "Finished training embedding policy, " + "train loss={:.3f}, train accuracy={:.3f}" + "".format(train_loss, train_acc) + ) + if eval_init_op is not None: + final_message += ( + ", validation loss={:.3f}, validation accuracy={:.3f}" + "".format(eval_loss, eval_acc) ) - full_X = full_X.reshape((-1, full_X.shape[-1])) + logger.info(final_message) + + def _output_training_stat_dataset( + self, eval_init_op: "tf.Operation", loss: "tf.Tensor", acc: "tf.Tensor" + ) -> Tuple[float, float]: + """Output training statistics""" + + self.session.run(eval_init_op) + ep_val_loss = 0 + ep_val_acc = 0 + batches_per_epoch = 0 + while True: + try: + batch_val_loss, batch_val_acc = self.session.run( + [loss, acc], feed_dict={self._is_training: False} + ) + batches_per_epoch += 1 + ep_val_loss += batch_val_loss + ep_val_acc += batch_val_acc + except tf.errors.OutOfRangeError: + break + + return ep_val_loss / batches_per_epoch, ep_val_acc / batches_per_epoch + + # prepare for prediction + def _create_tf_placeholders(self, session_data: "SessionData") -> None: + """Create placeholders for prediction.""" + + dialogue_len = None # use dynamic time + self.a_in = tf.placeholder( + dtype=tf.float32, + shape=(None, dialogue_len, session_data.X.shape[-1]), + name="a", + ) + self.b_in = tf.placeholder( + dtype=tf.float32, + shape=(None, dialogue_len, None, session_data.Y.shape[-1]), + name="b", + ) + + def _build_tf_pred_graph(self, session_data: "SessionData") -> "tf.Tensor": + """Rebuild tf graph for prediction.""" - _, i, c = np.unique(full_X, return_inverse=True, return_counts=True, axis=0) + self._create_tf_placeholders(session_data) - counts = c[i].reshape((X.shape[0], X.shape[1])) + self.dial_embed, mask = self._create_tf_dial(self.a_in) - # do not include [-1 -1 ... -1 0] in averaging - # and smooth it by taking sqrt - return np.maximum(np.sqrt(np.mean(c[1:]) / counts), 1) + self.sim_all = self._tf_raw_sim( + self.dial_embed[:, :, tf.newaxis, :], + self.all_bot_embed[tf.newaxis, tf.newaxis, :, :], + mask, + ) + + if self.similarity_type == "cosine": + # clip negative values to zero + confidence = tf.nn.relu(self.sim_all) else: - return [[None]] + # normalize result to [0, 1] with softmax + confidence = tf.nn.softmax(self.sim_all) + + self.bot_embed = self._create_tf_bot_embed(self.b_in) + + self.sim = self._tf_raw_sim( + self.dial_embed[:, :, tf.newaxis, :], self.bot_embed, mask + ) + + return confidence - def _train_tf( - self, session_data: SessionData, loss: tf.Tensor, mask: tf.Tensor + def _extract_attention(self) -> Optional["tf.Tensor"]: + """Extract attention probabilities from t2t dict""" + + attention = [ + tf.expand_dims(t, 0) + for name, t in self.attention_weights.items() + if name.endswith("multihead_attention/dot_product_attention") + ] + + if attention: + return tf.concat(attention, 0) + + # training methods + def train( + self, + training_trackers: List["DialogueStateTracker"], + domain: "Domain", + **kwargs: Any ) -> None: - """Train tf graph.""" + """Train the policy on given training trackers.""" - self.session.run(tf.global_variables_initializer()) + logger.debug("Started training embedding policy.") - if self.evaluate_on_num_examples: - logger.info( - "Accuracy is updated every {} epochs" - "".format(self.evaluate_every_num_epochs) - ) - pbar = tqdm(range(self.epochs), desc="Epochs", disable=is_logging_disabled()) - train_acc = 0 - last_loss = 0 - for ep in pbar: - # randomize training data for the current epoch - ids = np.random.permutation(session_data.X.shape[0]) + # set numpy random seed + np.random.seed(self.random_seed) - # calculate batch size for the current epoch - batch_size = self._linearly_increasing_batch_size(ep) - # calculate number of batches in the current epoch - batches_per_epoch = session_data.X.shape[0] // batch_size + int( - session_data.X.shape[0] % batch_size > 0 - ) + # dealing with training data + training_data = self.featurize_for_training(training_trackers, domain, **kwargs) - # collect average loss over the batches - ep_loss = 0 - for i in range(batches_per_epoch): - start_idx = i * batch_size - end_idx = (i + 1) * batch_size - batch_ids = ids[start_idx:end_idx] + # encode all actions with policies' featurizer + self._encoded_all_actions = self.featurizer.state_featurizer.create_encoded_all_actions( + domain + ) - # get randomized data for current batch - batch_a = session_data.X[batch_ids] - batch_pos_b = session_data.Y[batch_ids] - actions_for_b = session_data.actions_for_Y[batch_ids] + # check if number of negatives is less than number of actions + logger.debug( + "Check if num_neg {} is smaller " + "than number of actions {}, " + "else set num_neg to the number of actions - 1" + "".format(self.num_neg, domain.num_actions) + ) + # noinspection PyAttributeOutsideInit + self.num_neg = min(self.num_neg, domain.num_actions - 1) - # add negatives - incorrect bot actions predictions - batch_b = self._create_batch_b(batch_pos_b, actions_for_b) + # extract actual training data to feed to tf session + session_data = self._create_session_data(training_data.X, training_data.y) - batch_c = session_data.slots[batch_ids] - batch_b_prev = session_data.previous_actions[batch_ids] + if self.evaluate_on_num_examples: + session_data, eval_session_data = self._train_val_split(session_data) + else: + eval_session_data = None - # calculate how much the loss from each action - # should be scaled based on action rarity - batch_loss_scales = self._scale_loss_by_count_actions( - batch_a, batch_c, batch_b_prev, actions_for_b - ) + self.graph = tf.Graph() - # minimize and calculate loss - _loss, _ = self.session.run( - [loss, self._train_op], - feed_dict={ - self.a_in: batch_a, - self.b_in: batch_b, - self.c_in: batch_c, - self.b_prev_in: batch_b_prev, - self._dialogue_len: session_data.X.shape[1], - self._x_for_no_intent_in: session_data.x_for_no_intent, - self._y_for_no_action_in: session_data.y_for_no_action, - self._y_for_action_listen_in: session_data.y_for_action_listen, - self._is_training: True, - self._loss_scales: batch_loss_scales, - }, - ) - # collect average loss over the batches - ep_loss += _loss / batches_per_epoch - - # calculate train accuracy - if self.evaluate_on_num_examples: - if ( - (ep + 1) == 1 - or (ep + 1) % self.evaluate_every_num_epochs == 0 - or (ep + 1) == self.epochs - ): - train_acc = self._calc_train_acc(session_data, mask) - last_loss = ep_loss - - pbar.set_postfix( - { - "loss": "{:.3f}".format(ep_loss), - "acc": "{:.3f}".format(train_acc), - } + with self.graph.as_default(): + # set random seed in tf + tf.set_random_seed(self.random_seed) + + # allows increasing batch size + batch_size_in = tf.placeholder(tf.int64) + train_dataset = self._create_tf_dataset( + session_data, + batch_size_in, + batch_strategy=self.batch_strategy, + shuffle=True, + ) + + self._iterator = self._create_tf_iterator(train_dataset) + + train_init_op = self._iterator.make_initializer(train_dataset) + + if eval_session_data is not None: + eval_init_op = self._iterator.make_initializer( + self._create_tf_dataset( + eval_session_data, + # pick maximum batch_size for eval + self._linearly_increasing_batch_size(self.epochs), + ) ) else: - pbar.set_postfix({"loss": "{:.3f}".format(ep_loss)}) + eval_init_op = None - if self.evaluate_on_num_examples: - logger.info( - "Finished training embedding policy, " - "loss={:.3f}, train accuracy={:.3f}" - "".format(last_loss, train_acc) - ) + self._is_training = tf.placeholder_with_default(False, shape=()) + loss, acc = self._build_tf_train_graph() - def _calc_train_acc(self, session_data: SessionData, mask: tf.Tensor) -> np.float32: - """Calculate training accuracy.""" + # define which optimizer to use + self._train_op = tf.train.AdamOptimizer().minimize(loss) - # choose n examples to calculate train accuracy - n = self.evaluate_on_num_examples - ids = np.random.permutation(len(session_data.X))[:n] - # noinspection PyPep8Naming - all_Y_d_x = np.stack( - [session_data.all_Y_d for _ in range(session_data.X[ids].shape[0])] - ) + # train tensorflow graph + self.session = tf.Session(config=self._tf_config) + self._train_tf_dataset( + train_init_op, eval_init_op, batch_size_in, loss, acc + ) - _sim, _mask = self.session.run( - [self.sim_op, mask], - feed_dict={ - self.a_in: session_data.X[ids], - self.b_in: all_Y_d_x, - self.c_in: session_data.slots[ids], - self.b_prev_in: session_data.previous_actions[ids], - self._dialogue_len: session_data.X.shape[1], - self._x_for_no_intent_in: session_data.x_for_no_intent, - self._y_for_no_action_in: session_data.y_for_no_action, - self._y_for_action_listen_in: session_data.y_for_action_listen, - }, - ) - return np.sum( - (np.argmax(_sim, -1) == session_data.actions_for_Y[ids]) * _mask - ) / np.sum(_mask) + # rebuild the graph for prediction + self.pred_confidence = self._build_tf_pred_graph(session_data) + + self.attention_weights = self._extract_attention() def continue_training( self, - training_trackers: List[DialogueStateTracker], - domain: Domain, + training_trackers: List["DialogueStateTracker"], + domain: "Domain", **kwargs: Any ) -> None: """Continue training an already trained policy.""" @@ -1299,43 +1213,42 @@ def continue_training( batch_size = kwargs.get("batch_size", 5) epochs = kwargs.get("epochs", 50) - for _ in range(epochs): - training_data = self._training_data_for_continue_training( - batch_size, training_trackers, domain - ) + with self.graph.as_default(): + for _ in range(epochs): + training_data = self._training_data_for_continue_training( + batch_size, training_trackers, domain + ) - session_data = self._create_tf_session_data( - domain, training_data.X, training_data.y - ) + session_data = self._create_session_data( + training_data.X, training_data.y + ) + train_dataset = self._create_tf_dataset(session_data, batch_size) + train_init_op = self._iterator.make_initializer(train_dataset) + self.session.run(train_init_op) - b = self._create_batch_b(session_data.Y, session_data.actions_for_Y) + # fit to one extra example using updated trackers + while True: + try: + self.session.run( + self._train_op, feed_dict={self._is_training: True} + ) - batch_loss_scales = self._scale_loss_by_count_actions( - session_data.X, - session_data.slots, - session_data.previous_actions, - session_data.actions_for_Y, - ) + except tf.errors.OutOfRangeError: + break - # fit to one extra example using updated trackers - self.session.run( - self._train_op, - feed_dict={ - self.a_in: session_data.X, - self.b_in: b, - self.c_in: session_data.slots, - self.b_prev_in: session_data.previous_actions, - self._dialogue_len: session_data.X.shape[1], - self._x_for_no_intent_in: session_data.x_for_no_intent, - self._y_for_no_action_in: session_data.y_for_no_action, - self._y_for_action_listen_in: session_data.y_for_action_listen, - self._is_training: True, - self._loss_scales: batch_loss_scales, - }, - ) + def tf_feed_dict_for_prediction( + self, tracker: "DialogueStateTracker", domain: "Domain" + ) -> Dict["tf.Tensor", "np.ndarray"]: + """Create feed dictionary for tf session.""" + + # noinspection PyPep8Naming + data_X = self.featurizer.create_X([tracker], domain) + session_data = self._create_session_data(data_X) + + return {self.a_in: session_data.X} def predict_action_probabilities( - self, tracker: DialogueStateTracker, domain: Domain + self, tracker: "DialogueStateTracker", domain: "Domain" ) -> List[float]: """Predict the next action the bot should take. @@ -1350,40 +1263,15 @@ def predict_action_probabilities( ) return [0.0] * domain.num_actions - # noinspection PyPep8Naming - data_X = self.featurizer.create_X([tracker], domain) - session_data = self._create_tf_session_data(domain, data_X) - # noinspection PyPep8Naming - all_Y_d_x = np.stack( - [session_data.all_Y_d for _ in range(session_data.X.shape[0])] - ) + tf_feed_dict = self.tf_feed_dict_for_prediction(tracker, domain) - _sim = self.session.run( - self.sim_op, - feed_dict={ - self.a_in: session_data.X, - self.b_in: all_Y_d_x, - self.c_in: session_data.slots, - self.b_prev_in: session_data.previous_actions, - self._dialogue_len: session_data.X.shape[1], - self._x_for_no_intent_in: session_data.x_for_no_intent, - self._y_for_no_action_in: session_data.y_for_no_action, - self._y_for_action_listen_in: session_data.y_for_action_listen, - }, - ) + confidence = self.session.run(self.pred_confidence, feed_dict=tf_feed_dict) - result = _sim[0, -1, :] - if self.similarity_type == "cosine": - # clip negative values to zero - result[result < 0] = 0 - elif self.similarity_type == "inner": - # normalize result to [0, 1] with softmax - result = np.exp(result) - result /= np.sum(result) + return confidence[0, -1, :].tolist() - return result.tolist() + def _persist_tensor(self, name: Text, tensor: "tf.Tensor") -> None: + """Add tensor to collection if it is not None""" - def _persist_tensor(self, name: Text, tensor: tf.Tensor) -> None: if tensor is not None: self.graph.clear_collection(name) self.graph.add_to_collection(name, tensor) @@ -1411,45 +1299,30 @@ def persist(self, path: Text) -> None: rasa.utils.io.create_directory_for_file(checkpoint) with self.graph.as_default(): - self._persist_tensor("intent_placeholder", self.a_in) - self._persist_tensor("action_placeholder", self.b_in) - self._persist_tensor("slots_placeholder", self.c_in) - self._persist_tensor("prev_act_placeholder", self.b_prev_in) - self._persist_tensor("dialogue_len", self._dialogue_len) - self._persist_tensor("x_for_no_intent", self._x_for_no_intent_in) - self._persist_tensor("y_for_no_action", self._y_for_no_action_in) - self._persist_tensor("y_for_action_listen", self._y_for_action_listen_in) - - self._persist_tensor("similarity_op", self.sim_op) + self._persist_tensor("user_placeholder", self.a_in) + self._persist_tensor("bot_placeholder", self.b_in) - self._persist_tensor("alignment_history", self.alignment_history) + self._persist_tensor("similarity_all", self.sim_all) + self._persist_tensor("pred_confidence", self.pred_confidence) + self._persist_tensor("similarity", self.sim) - self._persist_tensor("user_embed", self.user_embed) - self._persist_tensor("bot_embed", self.bot_embed) - self._persist_tensor("slot_embed", self.slot_embed) self._persist_tensor("dial_embed", self.dial_embed) + self._persist_tensor("bot_embed", self.bot_embed) + self._persist_tensor("all_bot_embed", self.all_bot_embed) - self._persist_tensor("rnn_embed", self.rnn_embed) - self._persist_tensor("attn_embed", self.attn_embed) - self._persist_tensor("copy_attn_debug", self.copy_attn_debug) - - self._persist_tensor("all_time_masks", self.all_time_masks) + self._persist_tensor("attention_weights", self.attention_weights) saver = tf.train.Saver() saver.save(self.session, checkpoint) - encoded_actions_file = os.path.join( - path, file_name + ".encoded_all_actions.pkl" - ) - with open(encoded_actions_file, "wb") as f: - pickle.dump(self.encoded_all_actions, f) - tf_config_file = os.path.join(path, file_name + ".tf_config.pkl") with open(tf_config_file, "wb") as f: pickle.dump(self._tf_config, f) @staticmethod - def load_tensor(name: Text) -> Optional[tf.Tensor]: + def load_tensor(name: Text) -> Optional["tf.Tensor"]: + """Load tensor or set it to None""" + tensor_list = tf.get_collection(name) return tensor_list[0] if tensor_list else None @@ -1457,11 +1330,12 @@ def load_tensor(name: Text) -> Optional[tf.Tensor]: def load(cls, path: Text) -> "EmbeddingPolicy": """Loads a policy from the storage. - **Needs to load its featurizer**""" + **Needs to load its featurizer** + """ if not os.path.exists(path): raise Exception( - "Failed to load dialogue model. Path {} " + "Failed to load dialogue model. Path '{}' " "doesn't exist".format(os.path.abspath(path)) ) @@ -1483,64 +1357,36 @@ def load(cls, path: Text) -> "EmbeddingPolicy": graph = tf.Graph() with graph.as_default(): - sess = tf.Session(config=_tf_config) + session = tf.Session(config=_tf_config) saver = tf.train.import_meta_graph(checkpoint + ".meta") - saver.restore(sess, checkpoint) + saver.restore(session, checkpoint) - a_in = cls.load_tensor("intent_placeholder") - b_in = cls.load_tensor("action_placeholder") - c_in = cls.load_tensor("slots_placeholder") - b_prev_in = cls.load_tensor("prev_act_placeholder") - dialogue_len = cls.load_tensor("dialogue_len") - x_for_no_intent = cls.load_tensor("x_for_no_intent") - y_for_no_action = cls.load_tensor("y_for_no_action") - y_for_action_listen = cls.load_tensor("y_for_action_listen") + a_in = cls.load_tensor("user_placeholder") + b_in = cls.load_tensor("bot_placeholder") - sim_op = cls.load_tensor("similarity_op") + sim_all = cls.load_tensor("similarity_all") + pred_confidence = cls.load_tensor("pred_confidence") + sim = cls.load_tensor("similarity") - alignment_history = cls.load_tensor("alignment_history") - - user_embed = cls.load_tensor("user_embed") - bot_embed = cls.load_tensor("bot_embed") - slot_embed = cls.load_tensor("slot_embed") dial_embed = cls.load_tensor("dial_embed") + bot_embed = cls.load_tensor("bot_embed") + all_bot_embed = cls.load_tensor("all_bot_embed") - rnn_embed = cls.load_tensor("rnn_embed") - attn_embed = cls.load_tensor("attn_embed") - copy_attn_debug = cls.load_tensor("copy_attn_debug") - - all_time_masks = cls.load_tensor("all_time_masks") - - encoded_actions_file = os.path.join( - path, "{}.encoded_all_actions.pkl".format(file_name) - ) - - with open(encoded_actions_file, "rb") as f: - encoded_all_actions = pickle.load(f) + attention_weights = cls.load_tensor("attention_weights") return cls( featurizer=featurizer, priority=meta["priority"], - encoded_all_actions=encoded_all_actions, graph=graph, - session=sess, - intent_placeholder=a_in, - action_placeholder=b_in, - slots_placeholder=c_in, - prev_act_placeholder=b_prev_in, - dialogue_len=dialogue_len, - x_for_no_intent=x_for_no_intent, - y_for_no_action=y_for_no_action, - y_for_action_listen=y_for_action_listen, - similarity_op=sim_op, - alignment_history=alignment_history, - user_embed=user_embed, - bot_embed=bot_embed, - slot_embed=slot_embed, + session=session, + user_placeholder=a_in, + bot_placeholder=b_in, + similarity_all=sim_all, + pred_confidence=pred_confidence, + similarity=sim, dial_embed=dial_embed, - rnn_embed=rnn_embed, - attn_embed=attn_embed, - copy_attn_debug=copy_attn_debug, - all_time_masks=all_time_masks, + bot_embed=bot_embed, + all_bot_embed=all_bot_embed, + attention_weights=attention_weights, ) diff --git a/rasa/core/policies/mapping_policy.py b/rasa/core/policies/mapping_policy.py index 17b179ff15e7..239a73aa4ae6 100644 --- a/rasa/core/policies/mapping_policy.py +++ b/rasa/core/policies/mapping_policy.py @@ -27,6 +27,10 @@ class MappingPolicy(Policy): executed whenever the intent is detected. This policy takes precedence over any other policy.""" + @staticmethod + def _standard_featurizer(): + return None + def __init__(self, priority: int = 3) -> None: """Create a new Mapping policy.""" diff --git a/rasa/core/policies/tf_utils.py b/rasa/core/policies/tf_utils.py deleted file mode 100644 index 2cfddda81bdd..000000000000 --- a/rasa/core/policies/tf_utils.py +++ /dev/null @@ -1,957 +0,0 @@ -from collections import namedtuple -import tensorflow as tf - -tf.contrib._warning = None # avoid warning println on contrib import - remove for tf 2 - - -class TimedNTM(object): - """Timed Neural Turing Machine - - Inspired by paper: - https://arxiv.org/pdf/1410.5401.pdf - Implementation inspired by: - https://github.com/carpedm20/NTM-tensorflow/blob/master/ntm_cell.py - - See our paper for details: https://arxiv.org/abs/1811.11707 - """ - - def __init__(self, attn_shift_range, sparse_attention, name): - """Construct the `TimedNTM`. - - Args: - attn_shift_range: Python int. - A time range within which to attend to the memory by location - sparse_attention: Python bool. - If `True` use sparsemax instead of softmax for probs - name: Name to use when creating ops. - """ - - # interpolation gate - self.name = "timed_ntm_" + name - - self._inter_gate = tf.layers.Dense( - units=1, activation=tf.sigmoid, name=self.name + "/inter_gate" - ) - # if use sparsemax instead of softmax for probs - self._sparse_attention = sparse_attention - - if sparse_attention: - # sparsemax doesn't support inf - self._inf = float(5000) - else: - self._inf = float("inf") - - # shift weighting if range is provided - if attn_shift_range: - self._shift_weight = tf.layers.Dense( - units=2 * attn_shift_range + 1, - activation=tf.nn.softmax, - name=self.name + "/shift_weight", - ) - else: - self._shift_weight = None - - # sharpening parameter - self._gamma_sharp = tf.layers.Dense( - units=1, - activation=lambda a: tf.nn.softplus(a) + 1, - bias_initializer=tf.constant_initializer(1), - name=self.name + "/gamma_sharp", - ) - - def __call__(self, attn_inputs, scores, scores_state, mask): - # apply exponential moving average with interpolation gate weight - # to scores from previous time which are equal to probs at this point - # different from original NTM where it is applied after softmax - i_g = self._inter_gate(attn_inputs) - - # scores limited by time - scores = tf.concat( - [i_g * scores[:, :-1] + (1 - i_g) * scores_state, scores[:, -1:]], 1 - ) - next_scores_state = scores - - if mask is not None: - # apply mask to scores - if self._shift_weight is not None: - # rearrange scores to make them continuous for convolution - scores = tf.map_fn( - self._rearrange_fn, [scores, mask], dtype=scores.dtype - ) - else: - scores = tf.where(mask > 0, scores, -self._inf * tf.ones_like(scores)) - - # create probabilities for attention - if self._sparse_attention: - probs = tf.contrib.sparsemax.sparsemax(scores) - else: - probs = tf.nn.softmax(scores) - - if self._shift_weight is not None: - s_w = self._shift_weight(attn_inputs) - - # we want to go back in time during convolution - conv_probs = tf.reverse(probs, axis=[1]) - - # preare probs for tf.nn.depthwise_conv2d - # [in_width, in_channels=batch] - conv_probs = tf.transpose(conv_probs, [1, 0]) - # [batch=1, in_height=1, in_width=time+1, in_channels=batch] - conv_probs = conv_probs[tf.newaxis, tf.newaxis, :, :] - - # [filter_height=1, filter_width=2*attn_shift_range+1, - # in_channels=batch, channel_multiplier=1] - conv_s_w = tf.transpose(s_w, [1, 0]) - conv_s_w = conv_s_w[tf.newaxis, :, :, tf.newaxis] - - # perform 1d convolution - # [batch=1, out_height=1, out_width=time+1, out_channels=batch] - conv_probs = tf.nn.depthwise_conv2d_native( - conv_probs, conv_s_w, [1, 1, 1, 1], "SAME" - ) - conv_probs = conv_probs[0, 0, :, :] - conv_probs = tf.transpose(conv_probs, [1, 0]) - - probs = tf.reverse(conv_probs, axis=[1]) - - if mask is not None: - # arrange probs back to their original time order - probs = tf.map_fn( - self._arrange_back_fn, [probs, mask], dtype=probs.dtype - ) - - # sharpening - g_sh = self._gamma_sharp(attn_inputs) - - powed_probs = tf.pow(probs, g_sh) - probs = powed_probs / (tf.reduce_sum(powed_probs, 1, keepdims=True) + 1e-32) - - return probs, next_scores_state - - def _rearrange_fn(self, list_tensor_1d_mask_1d): - """Rearranges tensor_1d to put all the values - where mask_1d=1 to the right and - where mask_1d=0 to the left and sets them to -infinity""" - tensor_1d, mask_1d = list_tensor_1d_mask_1d - - partitioned_tensor = tf.dynamic_partition(tensor_1d, mask_1d, 2) - partitioned_tensor[0] = -self._inf * tf.ones_like(partitioned_tensor[0]) - - return tf.concat(partitioned_tensor, 0) - - @staticmethod - def _arrange_back_fn(list_tensor_1d_mask_1d): - """Arranges back tensor_1d to restore original order - modified by `_rearrange_fn` according to mask_1d: - - number of 0s in mask_1d values on the left are set to - their corresponding places where mask_1d=0, - - number of 1s in mask_1d values on the right are set to - their corresponding places where mask_1d=1""" - tensor_1d, mask_1d = list_tensor_1d_mask_1d - - mask_indices = tf.dynamic_partition( - tf.range(tf.shape(tensor_1d)[0]), mask_1d, 2 - ) - - mask_sum = tf.reduce_sum(mask_1d, axis=0) - partitioned_tensor = [ - tf.zeros_like(tensor_1d[:-mask_sum]), - tensor_1d[-mask_sum:], - ] - - return tf.dynamic_stitch(mask_indices, partitioned_tensor) - - -def _compute_time_attention( - attention_mechanism, - attn_inputs, - attention_state, - # time is added to calculate time attention - time, - timed_ntm, - time_mask, - ignore_mask, - attention_layer, -): - """Computes the attention and alignments limited by time - for a given attention_mechanism. - - Modified helper method from tensorflow.""" - - scores, _ = attention_mechanism(attn_inputs, state=attention_state) - - # take only scores from current and past times - timed_scores = scores[:, : time + 1] - timed_scores_state = attention_state[:, :time] - - # get mask for past times - timed_time_mask = time_mask[:, :time] - if ignore_mask is not None: - timed_time_mask *= 1 - ignore_mask[:, :time] - - # set mask for current time to 1 - timed_time_mask = tf.concat([timed_time_mask, tf.ones_like(time_mask[:, :1])], 1) - - # pass these scores to NTM - probs, next_scores_state = timed_ntm( - attn_inputs, timed_scores, timed_scores_state, timed_time_mask - ) - - # concatenate probs with zeros to get new alignments - zeros = tf.zeros_like(scores) - # remove current time from attention - alignments = tf.concat([probs[:, :-1], zeros[:, time:]], 1) - - # Reshape from [batch_size, memory_time] to [batch_size, 1, memory_time] - expanded_alignments = tf.expand_dims(alignments, 1) - - # Context is the inner product of alignments and values along the - # memory time dimension. - # alignments shape is - # [batch_size, 1, memory_time] - # attention_mechanism.values shape is - # [batch_size, memory_time, memory_size] - # the batched matmul is over memory_time, so the output shape is - # [batch_size, 1, memory_size]. - # we then squeeze out the singleton dim. - context = tf.matmul(expanded_alignments, attention_mechanism.values) - context = tf.squeeze(context, [1]) - - if attention_layer is not None: - attention = attention_layer(tf.concat([attn_inputs, context], 1)) - else: - attention = context - - # return current time to attention - alignments = tf.concat([probs, zeros[:, time + 1 :]], 1) - next_attention_state = tf.concat([next_scores_state, zeros[:, time + 1 :]], 1) - return attention, alignments, next_attention_state - - -# noinspection PyProtectedMember -class TimeAttentionWrapperState( - namedtuple( - "TimeAttentionWrapperState", - tf.contrib.seq2seq.AttentionWrapperState._fields - + ("all_time_masks", "all_cell_states"), - ) -): # added - """Modified from tensorflow's tf.contrib.seq2seq.AttentionWrapperState - see there for description of the parameters - - Additional fields: - - `all_time_masks`: A mask applied to a memory - that filters certain time steps - - `all_cell_states`: All states of the wrapped `RNNCell` - at all the previous time steps. - """ - - def clone(self, **kwargs): - """Copied from tensorflow's tf.contrib.seq2seq.AttentionWrapperState - see there for description of the parameters""" - - def with_same_shape(old, new): - """Check and set new tensor's shape.""" - if isinstance(old, tf.Tensor) and isinstance(new, tf.Tensor): - return tf.contrib.framework.with_same_shape(old, new) - return new - - return tf.contrib.framework.nest.map_structure( - with_same_shape, - self, - super(TimeAttentionWrapperState, self)._replace(**kwargs), - ) - - -class TimeAttentionWrapper(tf.contrib.seq2seq.AttentionWrapper): - """Custom AttentionWrapper that takes into account time - when calculating attention. - Attention is calculated before calling rnn cell. - - Modified from tensorflow's tf.contrib.seq2seq.AttentionWrapper. - - See our paper for details: https://arxiv.org/abs/1811.11707 - """ - - def __init__( - self, - cell, - attention_mechanism, - sequence_len, - attn_shift_range=0, - sparse_attention=False, - attention_layer_size=None, - alignment_history=False, - rnn_and_attn_inputs_fn=None, - ignore_mask=None, - cell_input_fn=None, - index_of_attn_to_copy=None, - likelihood_fn=None, - tensor_not_to_copy=None, - output_attention=False, - initial_cell_state=None, - name=None, - attention_layer=None, - ): - """Construct the `TimeAttentionWrapper`. - See the super class for the original arguments description. - - Additional args: - sequence_len: Python integer. - Maximum length of the sequence, used to create - appropriate TensorArray for all cell states - in TimeAttentionWrapperState - attn_shift_range: Python integer (`0` by default). - A time range within which to attend to the memory - by location in Neural Turing Machine. - sparse_attention: Python bool. - A flag to use sparsemax (if `True`) instead of - softmax (if `False`, default) for probabilities - inputs_and_attn_inputs_fn: (optional) A `callable`. - A function that creates inputs and attention inputs tensors. - ignore_mask: (optional) Boolean Tensor. - Determines which time steps to ignore in attention - index_of_attn_to_copy: (optional) Python integer. - An index of attention mechanism that picks - which part of attention tensor to use for copying to output, - the default is `None`, which turns off copying mechanism. - Copy inspired by: https://arxiv.org/pdf/1603.06393.pdf - likelihood_fn: (optional) A `callable`. - A method to perform likelihood calculation to - filter time step in copy mechanism. - Returns a tuple of binary likelihood and likelihood - tensor_not_to_copy: (optional) A Tensor. - A tensor, which shouldn't be copied from previous time steps - - Modified args: - output_attention: Python bool. If `True`, the output at each - time step is the concatenated cell outputs, - attention values and additional values described in - `additional_output_size()`, used in copy mechanism. - """ - super(TimeAttentionWrapper, self).__init__( - cell, - attention_mechanism, - attention_layer_size, - alignment_history, - cell_input_fn, - output_attention, - initial_cell_state, - name, - attention_layer, - ) - self._sequence_len = sequence_len - - if not isinstance(attn_shift_range, list): - # attn_shift_range might not be a list - attn_shift_range = [attn_shift_range] - self._timed_ntms = [TimedNTM(attn_shift_range[0], sparse_attention, name="0")] - if self._is_multi: - # if there are several attention mechanisms, - # create additional TimedNTMs for them - if len(attn_shift_range) == 1: - # original attn_shift_range might not be a list - attn_shift_range *= len(attention_mechanism) - elif len(attn_shift_range) != len(attention_mechanism): - raise ValueError( - "If provided, `attn_shift_range` must contain exactly one " - "integer per attention_mechanism, saw: {} vs {}" - "".format(len(attn_shift_range), len(attention_mechanism)) - ) - for i in range(1, len(attention_mechanism)): - self._timed_ntms.append( - TimedNTM(attn_shift_range[i], sparse_attention, name=str(i)) - ) - - if rnn_and_attn_inputs_fn is None: - rnn_and_attn_inputs_fn = self._default_rnn_and_attn_inputs_fn - else: - if not callable(rnn_and_attn_inputs_fn): - raise TypeError( - "`rnn_and_attn_inputs_fn` must be callable, saw type: {}" - "".format(type(rnn_and_attn_inputs_fn).__name__) - ) - self._rnn_and_attn_inputs_fn = rnn_and_attn_inputs_fn - - if not isinstance(ignore_mask, list): - self._ignore_mask = [tf.cast(ignore_mask, tf.int32)] - else: - self._ignore_mask = [tf.cast(i_m, tf.int32) for i_m in ignore_mask] - - self._index_of_attn_to_copy = index_of_attn_to_copy - - self._likelihood_fn = likelihood_fn - self._tensor_not_to_copy = tensor_not_to_copy - - @staticmethod - def _default_rnn_and_attn_inputs_fn(inputs, cell_state): - if isinstance(cell_state, tf.contrib.rnn.LSTMStateTuple): - return inputs, tf.concat([inputs, cell_state.h], -1) - else: - return inputs, tf.concat([inputs, cell_state], -1) - - @staticmethod - def additional_output_size(): - """Number of additional outputs: - - likelihoods: - attn_likelihood, state_likelihood - debugging info: - current_time_prob, - bin_likelihood_not_to_copy, bin_likelihood_to_copy - - **Method should be static** - """ - return 2 + 3 - - @property - def output_size(self): - if self._output_attention: - if self._index_of_attn_to_copy is not None: - # output both raw rnn cell_output and - # cell_output with copied attention - # together with attention vector itself - # and additional output - return ( - 2 * self._cell.output_size - + self._attention_layer_size - + self.additional_output_size() - ) - else: - return self._cell.output_size + self._attention_layer_size - else: - return self._cell.output_size - - @property - def state_size(self): - """The `state_size` property of `TimeAttentionWrapper`. - Returns: - A `TimeAttentionWrapperState` tuple containing shapes - used by this object. - """ - - # use AttentionWrapperState from superclass - state_size = super(TimeAttentionWrapper, self).state_size - - all_cell_states = self._cell.state_size - - return TimeAttentionWrapperState( - cell_state=state_size.cell_state, - time=state_size.time, - attention=state_size.attention, - alignments=state_size.alignments, - attention_state=state_size.attention_state, - alignment_history=state_size.alignment_history, - all_time_masks=self._sequence_len, - all_cell_states=all_cell_states, - ) - - def zero_state(self, batch_size, dtype): - """Modified from tensorflow's zero_state - see there for description of the parameters""" - - # use AttentionWrapperState from superclass - zero_state = super(TimeAttentionWrapper, self).zero_state(batch_size, dtype) - - with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): - # store time masks - all_time_masks = tf.TensorArray( - tf.int32, - size=self._sequence_len + 1, - dynamic_size=False, - clear_after_read=False, - ).write(0, tf.zeros([batch_size, self.state_size.all_time_masks], tf.int32)) - - # store all cell states into a tensor array to allow - # copy mechanism to go back in time - if isinstance(self._cell.state_size, tf.contrib.rnn.LSTMStateTuple): - all_cell_states = tf.contrib.rnn.LSTMStateTuple( - tf.TensorArray( - dtype, - size=self._sequence_len + 1, - dynamic_size=False, - clear_after_read=False, - ).write(0, zero_state.cell_state.c), - tf.TensorArray( - dtype, - size=self._sequence_len + 1, - dynamic_size=False, - clear_after_read=False, - ).write(0, zero_state.cell_state.h), - ) - else: - all_cell_states = tf.TensorArray( - dtype, size=0, dynamic_size=False, clear_after_read=False - ).write(0, zero_state.cell_state) - - return TimeAttentionWrapperState( - cell_state=zero_state.cell_state, - time=zero_state.time, - attention=zero_state.attention, - alignments=zero_state.alignments, - attention_state=zero_state.attention_state, - alignment_history=zero_state.alignment_history, - all_time_masks=all_time_masks, - all_cell_states=all_cell_states, - ) - - def call(self, inputs, state): - """Perform a step of attention-wrapped RNN. - - The order has changed: - - Step 1: Calculate attention inputs based on the previous cell state - and current inputs - - Step 2: Score the output with `attention_mechanism`. - - Step 3: Calculate the alignments by passing the score through the - `normalizer` and limit them by time. - - Step 4: Calculate the context vector as the inner product between the - alignments and the attention_mechanism's values (memory). - - Step 5: Calculate the attention output by concatenating - the cell output and context through the attention layer - (a linear layer with `attention_layer_size` outputs). - - Step 6: Mix the `inputs` and `attention` output via - `cell_input_fn` to get cell inputs. - - Step 7: Call the wrapped `cell` with these cell inputs and - its previous state. - - Step 8: (optional) Maybe copy output and cell state from history - - Args: - inputs: (Possibly nested tuple of) Tensor, - the input at this time step. - state: An instance of `TimeAttentionWrapperState` - containing tensors from the previous time step. - - Returns: - A tuple `(attention_or_cell_output, next_state)`, where: - - - `attention_or_cell_output` depending on `output_attention`. - - `next_state` is an instance of `TimeAttentionWrapperState` - containing the state calculated at this time step. - - Raises: - TypeError: If `state` is not an instance of - `TimeAttentionWrapperState`. - """ - if not isinstance(state, TimeAttentionWrapperState): - raise TypeError( - "Expected state to be instance of " - "TimeAttentionWrapperState. " - "Received type {} instead.".format(type(state)) - ) - - # Step 1: Calculate attention based on - # the previous output and current input - cell_state = state.cell_state - - rnn_inputs, attn_inputs = self._rnn_and_attn_inputs_fn(inputs, cell_state) - - cell_batch_size = attn_inputs.shape[0].value or tf.shape(attn_inputs)[0] - error_message = ( - "When applying AttentionWrapper %s: " % self.name - + "Non-matching batch sizes between the memory " - "(encoder output) and the query (decoder output). " - "Are you using " - "the BeamSearchDecoder? " - "You may need to tile your memory input via " - "the tf.contrib.seq2seq.tile_batch function with argument " - "multiple=beam_width." - ) - with tf.control_dependencies( - self._batch_size_checks(cell_batch_size, error_message) - ): - attn_inputs = tf.identity(attn_inputs, name="checked_attn_inputs") - - if self._is_multi: - previous_attention_state = state.attention_state - previous_alignment_history = state.alignment_history - else: - previous_attention_state = [state.attention_state] - previous_alignment_history = [state.alignment_history] - - all_alignments = [] - all_attentions = [] - all_attention_states = [] - maybe_all_histories = [] - - prev_time_masks = self._read_from_tensor_array(state.all_time_masks, state.time) - prev_time_mask = prev_time_masks[:, -1, :] - - for i, attention_mechanism in enumerate(self._attention_mechanisms): - # Steps 2 - 5 are performed inside `_compute_time_attention` - (attention, alignments, next_attention_state) = _compute_time_attention( - attention_mechanism, - attn_inputs, - previous_attention_state[i], - # time is added to calculate time attention - state.time, - self._timed_ntms[i], - # provide boolean masks, to ignore some time steps - prev_time_mask, - self._ignore_mask[i], - self._attention_layers[i] if self._attention_layers else None, - ) - - alignment_history = ( - previous_alignment_history[i].write(state.time, alignments) - if self._alignment_history - else () - ) - - all_attention_states.append(next_attention_state) - all_alignments.append(alignments) - all_attentions.append(attention) - maybe_all_histories.append(alignment_history) - - attention = tf.concat(all_attentions, 1) - - # Step 6: Mix the `inputs` and `attention` output via - # `cell_input_fn` to get cell inputs. - cell_inputs = self._cell_input_fn(rnn_inputs, attention) - - # Step 7: Call the wrapped `cell` with these cell inputs and - # its previous state. - cell_output, next_cell_state = self._cell(cell_inputs, cell_state) - - prev_all_cell_states = state.all_cell_states - - time_mask = tf.concat( - [ - prev_time_mask[:, : state.time], - tf.ones_like(prev_time_mask[:, :1]), - prev_time_mask[:, state.time + 1 :], - ], - 1, - ) - - if self._index_of_attn_to_copy is not None: - # Step 8: Maybe copy output and cell state from history - - # get relevant previous outputs from history - attn_to_copy = all_attentions[self._index_of_attn_to_copy] - # copy them to current output - cell_output_with_attn = cell_output + attn_to_copy - - memory_probs = self._get_memory_probs(all_alignments, state.time) - - # check that we do not pay attention to `tensor_not_to_copy` - bin_likelihood_not_to_copy, _ = self._likelihood_fn( - cell_output_with_attn, self._tensor_not_to_copy - ) - # recalculate probs - memory_probs *= 1 - bin_likelihood_not_to_copy - - history_alignments = self._history_alignments(memory_probs) - - # get previous output from the history - prev_output = self._prev_output( - cell_output_with_attn, history_alignments, state.time - ) - - # check that current output is close to - # the one in the history to which we pay attention to - bin_likelihood_to_copy, _ = self._likelihood_fn( - cell_output_with_attn, prev_output - ) - # recalculate probs - memory_probs *= bin_likelihood_to_copy - - history_alignments = self._history_alignments(memory_probs) - current_time_prob = history_alignments[:, -1:] - - # create additional likelihoods to maximize - attn_likelihood = self._additional_likelihood( - attn_to_copy, prev_output, current_time_prob - ) - state_likelihood = self._additional_likelihood( - cell_output + tf.stop_gradient(attn_to_copy), - prev_output, - current_time_prob, - ) - - # recalculate time_mask - time_mask = self._apply_alignments_to_history( - tf.cast(history_alignments, time_mask.dtype), - prev_time_masks[:, :-1, :], - time_mask, - ) - - # recalculate new next_cell_state based on history_alignments - next_cell_state = self._new_next_cell_state( - prev_all_cell_states, - next_cell_state, - cell_output_with_attn, - history_alignments, - state.time, - ) - - all_cell_states = self._all_cell_states( - prev_all_cell_states, next_cell_state, state.time - ) - - if self._output_attention: - # concatenate cell outputs, attention, additional likelihoods - # and copy_attn_debug - output = tf.concat( - [ - cell_output_with_attn, - cell_output, - attention, - # additional likelihoods - attn_likelihood, - state_likelihood, - # copy_attn_debug - bin_likelihood_not_to_copy, - bin_likelihood_to_copy, - current_time_prob, - ], - 1, - ) - else: - output = cell_output_with_attn - - else: - # do not waste resources on storing history - all_cell_states = prev_all_cell_states - - if self._output_attention: - output = tf.concat([cell_output, attention], 1) - else: - output = cell_output - - all_time_masks = state.all_time_masks.write(state.time + 1, time_mask) - - next_state = TimeAttentionWrapperState( - time=state.time + 1, - cell_state=next_cell_state, - attention=attention, - attention_state=self._item_or_tuple(all_attention_states), - alignments=self._item_or_tuple(all_alignments), - alignment_history=self._item_or_tuple(maybe_all_histories), - all_time_masks=all_time_masks, - all_cell_states=all_cell_states, - ) - return output, next_state - - # helper for TensorArray - @staticmethod - def _read_from_tensor_array(tensor_array, time): - """TensorArray time reader""" - return tf.transpose(tensor_array.gather(tf.range(0, time + 1)), [1, 0, 2]) - - # helper methods for copy mechanism - def _get_memory_probs(self, all_alignments, time): - """Helper method to get memory_probs from all_alignments""" - - memory_probs = tf.stop_gradient( - all_alignments[self._index_of_attn_to_copy][:, :time] - ) - - # binarize memory_probs only if max value is larger than margin=0.1 - memory_probs_max = tf.reduce_max(memory_probs, axis=1, keepdims=True) - memory_probs_max = tf.where( - memory_probs_max > 0.1, memory_probs_max, -memory_probs_max - ) - - return tf.where( - tf.equal(memory_probs, memory_probs_max), - tf.ones_like(memory_probs), - tf.zeros_like(memory_probs), - ) - - @staticmethod - def _history_alignments(memory_probs): - """Helper method to apply binary mask to memory_probs""" - - current_time_prob = 1 - tf.reduce_sum(memory_probs, 1, keepdims=True) - return tf.concat([memory_probs, current_time_prob], 1) - - @staticmethod - def _apply_alignments_to_history(alignments, history_states, state): - """Helper method to apply attention probabilities to rnn history - - copied from tf's `_compute_attention(...)`""" - - expanded_alignments = tf.stop_gradient(tf.expand_dims(alignments, 1)) - - history_states = tf.concat([history_states, tf.expand_dims(state, 1)], 1) - - # Context is the inner product of alignments and values along the - # memory time dimension. - # expanded_alignments shape is - # [batch_size, 1, memory_time] - # history_states shape is - # [batch_size, memory_time, memory_size] - # the batched matmul is over memory_time, so the output shape is - # [batch_size, 1, memory_size]. - # we then squeeze out the singleton dim. - - return tf.squeeze(tf.matmul(expanded_alignments, history_states), [1]) - - def _prev_output(self, state, alignments, time): - """Helper method to get previous output from memory""" - - # get all previous outputs from appropriate - # attention mechanism's memory limited by current time - prev_outputs = tf.stop_gradient( - self._attention_mechanisms[self._index_of_attn_to_copy].values[:, :time, :] - ) - - # multiply by alignments to get one vector from one time step - return self._apply_alignments_to_history(alignments, prev_outputs, state) - - def _additional_likelihood(self, output, prev_output, current_time_prob): - """Helper method to create additional likelihood to maximize""" - - _, likelihood = self._likelihood_fn(output, tf.stop_gradient(prev_output)) - return tf.where(current_time_prob < 0.5, likelihood, tf.ones_like(likelihood)) - - def _new_hidden_state(self, prev_all_cell_states, new_state, alignments, time): - """Helper method to look into rnn history""" - - # reshape to (batch, time, memory_time) and - # do not include current time because - # we do not want to pay attention to it, - # but we need to read it instead of - # adding conditional flow if time == 0 - prev_cell_states = self._read_from_tensor_array(prev_all_cell_states, time)[ - :, :-1, : - ] - - return self._apply_alignments_to_history( - alignments, prev_cell_states, new_state - ) - - def _new_next_cell_state( - self, prev_all_cell_states, next_cell_state, new_cell_output, alignments, time - ): - """Helper method to recalculate new next_cell_state""" - - if isinstance(next_cell_state, tf.contrib.rnn.LSTMStateTuple): - next_cell_state_c = self._new_hidden_state( - prev_all_cell_states.c, next_cell_state.c, alignments, time - ) - next_cell_state_h = self._new_hidden_state( - prev_all_cell_states.h, new_cell_output, alignments, time - ) - return tf.contrib.rnn.LSTMStateTuple(next_cell_state_c, next_cell_state_h) - else: - return self._new_hidden_state( - prev_all_cell_states, alignments, new_cell_output, time - ) - - @staticmethod - def _all_cell_states(prev_all_cell_states, next_cell_state, time): - """Helper method to recalculate all_cell_states tensor array""" - - if isinstance(next_cell_state, tf.contrib.rnn.LSTMStateTuple): - return tf.contrib.rnn.LSTMStateTuple( - prev_all_cell_states.c.write(time + 1, next_cell_state.c), - prev_all_cell_states.h.write(time + 1, next_cell_state.h), - ) - else: - return prev_all_cell_states.write(time + 1, next_cell_state) - - -class ChronoBiasLayerNormBasicLSTMCell(tf.contrib.rnn.LayerNormBasicLSTMCell): - """Custom LayerNormBasicLSTMCell that allows chrono initialization - of gate biases. - - See super class for description. - - See https://arxiv.org/abs/1804.11188 - for details about chrono initialization - """ - - def __init__( - self, - num_units, - forget_bias=1.0, - input_bias=0.0, - activation=tf.tanh, - layer_norm=True, - norm_gain=1.0, - norm_shift=0.0, - dropout_keep_prob=1.0, - dropout_prob_seed=None, - out_layer_size=None, - reuse=None, - ): - """Initializes the basic LSTM cell - - Additional args: - input_bias: float, The bias added to input gates. - out_layer_size: (optional) integer, The number of units in - the optional additional output layer. - """ - super(ChronoBiasLayerNormBasicLSTMCell, self).__init__( - num_units, - forget_bias=forget_bias, - activation=activation, - layer_norm=layer_norm, - norm_gain=norm_gain, - norm_shift=norm_shift, - dropout_keep_prob=dropout_keep_prob, - dropout_prob_seed=dropout_prob_seed, - reuse=reuse, - ) - self._input_bias = input_bias - self._out_layer_size = out_layer_size - - @property - def output_size(self): - return self._out_layer_size or self._num_units - - @property - def state_size(self): - return tf.contrib.rnn.LSTMStateTuple(self._num_units, self.output_size) - - @staticmethod - def _dense_layer(args, layer_size): - """Optional out projection layer""" - proj_size = args.get_shape()[-1] - dtype = args.dtype - weights = tf.get_variable("kernel", [proj_size, layer_size], dtype=dtype) - bias = tf.get_variable("bias", [layer_size], dtype=dtype) - out = tf.nn.bias_add(tf.matmul(args, weights), bias) - return out - - def call(self, inputs, state): - """LSTM cell with layer normalization and recurrent dropout.""" - c, h = state - args = tf.concat([inputs, h], 1) - concat = self._linear(args) - dtype = args.dtype - - i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) - if self._layer_norm: - i = self._norm(i, "input", dtype=dtype) - j = self._norm(j, "transform", dtype=dtype) - f = self._norm(f, "forget", dtype=dtype) - o = self._norm(o, "output", dtype=dtype) - - g = self._activation(j) - if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: - g = tf.nn.dropout(g, self._keep_prob, seed=self._seed) - - new_c = c * tf.sigmoid(f + self._forget_bias) + g * tf.sigmoid( - i + self._input_bias - ) # added input_bias - - # do not do layer normalization on the new c, - # because there are no trainable weights - # if self._layer_norm: - # new_c = self._norm(new_c, "state", dtype=dtype) - - new_h = self._activation(new_c) * tf.sigmoid(o) - - # added dropout to the hidden state h - if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1: - new_h = tf.nn.dropout(new_h, self._keep_prob, seed=self._seed) - - # add postprocessing of the output - if self._out_layer_size is not None: - with tf.variable_scope("out_layer"): - new_h = self._dense_layer(new_h, self._out_layer_size) - - new_state = tf.contrib.rnn.LSTMStateTuple(new_c, new_h) - return new_h, new_state diff --git a/rasa/nlu/classifiers/embedding_intent_classifier.py b/rasa/nlu/classifiers/embedding_intent_classifier.py index a613321cb6ba..3fa504c3c79d 100644 --- a/rasa/nlu/classifiers/embedding_intent_classifier.py +++ b/rasa/nlu/classifiers/embedding_intent_classifier.py @@ -11,6 +11,11 @@ from rasa.nlu.components import Component from rasa.utils.common import is_logging_disabled +import tensorflow as tf + +# avoid warning println on contrib import - remove for tf 2 +tf.contrib._warning = None + logger = logging.getLogger(__name__) if typing.TYPE_CHECKING: @@ -20,14 +25,6 @@ from rasa.nlu.model import Metadata from rasa.nlu.training_data import Message -try: - import tensorflow as tf - - # avoid warning println on contrib import - remove for tf 2 - tf.contrib._warning = None -except ImportError: - tf = None - class EmbeddingIntentClassifier(Component): """Intent classifier using supervised embeddings. @@ -120,7 +117,6 @@ def __init__( ) -> None: """Declare instant variables with default values""" - self._check_tensorflow() super(EmbeddingIntentClassifier, self).__init__(component_config) self._load_params() @@ -195,15 +191,6 @@ def _load_params(self) -> None: def required_packages(cls) -> List[Text]: return ["tensorflow"] - @staticmethod - def _check_tensorflow(): - if tf is None: - raise ImportError( - "Failed to import `tensorflow`. " - "Please install `tensorflow`. " - "For example with `pip install tensorflow`." - ) - # training data helpers: @staticmethod def _create_intent_dict(training_data: "TrainingData") -> Dict[Text, int]: diff --git a/requirements.txt b/requirements.txt index 8d5b5ab60992..66dce9b78614 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,9 @@ fakeredis==1.0.3 pymongo==3.8.0 numpy==1.16.3 scipy==1.2.1 -tensorflow==1.13.1 +tensorflow==1.14.0 +tensorflow-probability==0.7.0 +tensor2tensor==1.13.4 apscheduler==3.6.0 tqdm==4.31.0 networkx==2.3 @@ -47,3 +49,4 @@ SQLAlchemy~=1.3.3 kafka-python==1.4.6 sklearn-crfsuite==0.3.6 psycopg2-binary==2.8.2 +setuptools==41.0.1 diff --git a/setup.py b/setup.py index 85d291abc696..46f18776d41f 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,9 @@ "pymongo~=3.8", "numpy~=1.16", "scipy~=1.2", - "tensorflow~=1.13.0", + "tensorflow~=1.14.0", + "tensorflow-probability~=0.7.0", + "tensor2tensor~=1.13.4", "apscheduler~=3.0", "tqdm~=4.0", "networkx~=2.3", @@ -74,6 +76,7 @@ "SQLAlchemy~=1.3.0", "kafka-python~=1.4", "sklearn-crfsuite~=0.3.6", + "setuptools~=41.0.1", ] extras_requires = { diff --git a/tests/core/test_policies.py b/tests/core/test_policies.py index c496d2682ca3..4f8692ff23c3 100644 --- a/tests/core/test_policies.py +++ b/tests/core/test_policies.py @@ -21,7 +21,9 @@ from rasa.core.events import ActionExecuted from rasa.core.featurizers import ( BinarySingleStateFeaturizer, + LabelTokenizerSingleStateFeaturizer, MaxHistoryTrackerFeaturizer, + FullDialogueTrackerFeaturizer, ) from rasa.core.policies.two_stage_fallback import TwoStageFallbackPolicy from rasa.core.policies.embedding_policy import EmbeddingPolicy @@ -120,6 +122,26 @@ async def trained_policy(self, featurizer, priority): policy.train(training_trackers, default_domain) return policy + def test_featurizer(self, trained_policy, tmpdir): + assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer) + assert trained_policy.featurizer.max_history == self.max_history + assert isinstance( + trained_policy.featurizer.state_featurizer, BinarySingleStateFeaturizer + ) + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) + assert loaded.featurizer.max_history == self.max_history + assert isinstance( + loaded.featurizer.state_featurizer, BinarySingleStateFeaturizer + ) + + async def test_continue_training(self, trained_policy, default_domain): + training_trackers = await train_trackers(default_domain, augmentation_factor=0) + trained_policy.continue_training( + training_trackers, default_domain, **{"epochs": 1} + ) + async def test_persist_and_load(self, trained_policy, default_domain, tmpdir): trained_policy.persist(tmpdir.strpath) loaded = trained_policy.__class__.load(tmpdir.strpath) @@ -156,12 +178,14 @@ def test_persist_and_load_empty_policy(self, tmpdir): def test_tf_config(self, trained_policy, tmpdir): if hasattr(trained_policy, "session"): + import tensorflow as tf + # noinspection PyProtectedMember - assert trained_policy.session._config is None + assert trained_policy.session._config == tf.Session()._config trained_policy.persist(tmpdir.strpath) loaded = trained_policy.__class__.load(tmpdir.strpath) # noinspection PyProtectedMember - assert loaded.session._config is None + assert loaded.session._config == tf.Session()._config @staticmethod def _get_next_action(policy, events, domain): @@ -192,154 +216,6 @@ def test_tf_config(self, trained_policy, tmpdir): assert loaded.session._config == session_config() -class TestFallbackPolicy(PolicyTestCollection): - def create_policy(self, featurizer, priority): - p = FallbackPolicy(priority=priority) - return p - - @pytest.mark.parametrize( - "nlu_confidence, last_action_name, should_nlu_fallback", - [ - (0.1, "some_action", False), - (0.1, "action_listen", True), - (0.9, "some_action", False), - (0.9, "action_listen", False), - ], - ) - def test_should_nlu_fallback( - self, trained_policy, nlu_confidence, last_action_name, should_nlu_fallback - ): - assert ( - trained_policy.should_nlu_fallback(nlu_confidence, last_action_name) - is should_nlu_fallback - ) - - -class TestMappingPolicy(PolicyTestCollection): - def create_policy(self, featurizer, priority): - p = MappingPolicy() - return p - - @pytest.fixture(scope="module") - def domain_with_mapping(self): - return Domain.load(DEFAULT_DOMAIN_PATH_WITH_MAPPING) - - @pytest.fixture - def tracker(self, domain_with_mapping): - return DialogueStateTracker( - UserMessage.DEFAULT_SENDER_ID, domain_with_mapping.slots - ) - - @pytest.fixture( - params=[ - ("default", "utter_default"), - ("greet", "utter_greet"), - (USER_INTENT_RESTART, ACTION_RESTART_NAME), - (USER_INTENT_BACK, ACTION_BACK_NAME), - ] - ) - def intent_mapping(self, request): - return request.param - - def test_predict_mapped_action(self, priority, domain_with_mapping, intent_mapping): - policy = self.create_policy(None, priority) - events = [ - ActionExecuted(ACTION_LISTEN_NAME), - user_uttered(intent_mapping[0], 1), - ] - - assert ( - self._get_next_action(policy, events, domain_with_mapping) - == intent_mapping[1] - ) - - def test_predict_action_listen(self, priority, domain_with_mapping, intent_mapping): - policy = self.create_policy(None, priority) - events = [ - ActionExecuted(ACTION_LISTEN_NAME), - user_uttered(intent_mapping[0], 1), - ActionExecuted(intent_mapping[1], policy="policy_0_MappingPolicy"), - ] - tracker = get_tracker(events) - scores = policy.predict_action_probabilities(tracker, domain_with_mapping) - index = scores.index(max(scores)) - action_planned = domain_with_mapping.action_names[index] - assert action_planned == ACTION_LISTEN_NAME - assert scores != [0] * domain_with_mapping.num_actions - - def test_do_not_follow_other_policy( - self, priority, domain_with_mapping, intent_mapping - ): - policy = self.create_policy(None, priority) - events = [ - ActionExecuted(ACTION_LISTEN_NAME), - user_uttered(intent_mapping[0], 1), - ActionExecuted(intent_mapping[1], policy="other_policy"), - ] - tracker = get_tracker(events) - scores = policy.predict_action_probabilities(tracker, domain_with_mapping) - assert scores == [0] * domain_with_mapping.num_actions - - -class TestMemoizationPolicy(PolicyTestCollection): - def create_policy(self, featurizer, priority): - max_history = None - if isinstance(featurizer, MaxHistoryTrackerFeaturizer): - max_history = featurizer.max_history - p = MemoizationPolicy(priority=priority, max_history=max_history) - return p - - async def test_memorise(self, trained_policy, default_domain): - trackers = await train_trackers(default_domain, augmentation_factor=20) - trained_policy.train(trackers, default_domain) - lookup_with_augmentation = trained_policy.lookup - - trackers = [t for t in trackers if not hasattr(t, "is_augmented")] - - all_states, all_actions = trained_policy.featurizer.training_states_and_actions( - trackers, default_domain - ) - - for tracker, states, actions in zip(trackers, all_states, all_actions): - recalled = trained_policy.recall(states, tracker, default_domain) - assert recalled == default_domain.index_for_action(actions[0]) - - nums = np.random.randn(default_domain.num_states) - random_states = [{f: num for f, num in zip(default_domain.input_states, nums)}] - assert trained_policy._recall_states(random_states) is None - - # compare augmentation for augmentation_factor of 0 and 20: - trackers_no_augmentation = await train_trackers( - default_domain, augmentation_factor=0 - ) - trained_policy.train(trackers_no_augmentation, default_domain) - lookup_no_augmentation = trained_policy.lookup - - assert lookup_no_augmentation == lookup_with_augmentation - - def test_memorise_with_nlu(self, trained_policy, default_domain): - filename = "data/test_dialogues/default.json" - dialogue = read_dialogue_file(filename) - - tracker = DialogueStateTracker(dialogue.name, default_domain.slots) - tracker.recreate_from_dialogue(dialogue) - states = trained_policy.featurizer.prediction_states([tracker], default_domain)[ - 0 - ] - - recalled = trained_policy.recall(states, tracker, default_domain) - assert recalled is not None - - -class TestAugmentedMemoizationPolicy(PolicyTestCollection): - def create_policy(self, featurizer, priority): - max_history = None - if isinstance(featurizer, MaxHistoryTrackerFeaturizer): - max_history = featurizer.max_history - p = AugmentedMemoizationPolicy(priority=priority, max_history=max_history) - return p - - class TestSklearnPolicy(PolicyTestCollection): def create_policy(self, featurizer, priority, **kwargs): p = SklearnPolicy(featurizer, priority, **kwargs) @@ -463,51 +339,116 @@ def test_train_with_shuffle_false( policy.train(trackers, domain=default_domain) -class TestEmbeddingPolicyNoAttention(PolicyTestCollection): +class TestEmbeddingPolicy(PolicyTestCollection): def create_policy(self, featurizer, priority): - # use standard featurizer from EmbeddingPolicy, - # since it is using FullDialogueTrackerFeaturizer - p = EmbeddingPolicy( - priority=priority, attn_before_rnn=False, attn_after_rnn=False - ) + p = EmbeddingPolicy(featurizer=featurizer, priority=priority) return p + def test_similarity_type(self, trained_policy): + assert trained_policy.similarity_type == "inner" + + async def test_gen_batch(self, trained_policy, default_domain): + training_trackers = await train_trackers(default_domain, augmentation_factor=0) + training_data = trained_policy.featurize_for_training( + training_trackers, default_domain + ) + session_data = trained_policy._create_session_data( + training_data.X, training_data.y + ) + batch_size = 2 + batch_x, batch_y = next( + trained_policy._gen_batch(session_data=session_data, batch_size=batch_size) + ) + assert batch_x.shape[0] == batch_size and batch_y.shape[0] == batch_size + assert ( + batch_x[0].shape == session_data.X[0].shape + and batch_y[0].shape == session_data.Y[0].shape + ) + batch_x, batch_y = next( + trained_policy._gen_batch( + session_data=session_data, + batch_size=batch_size, + batch_strategy="balanced", + shuffle=True, + ) + ) + assert batch_x.shape[0] == batch_size and batch_y.shape[0] == batch_size + assert ( + batch_x[0].shape == session_data.X[0].shape + and batch_y[0].shape == session_data.Y[0].shape + ) + -class TestEmbeddingPolicyAttentionBeforeRNN(PolicyTestCollection): +class TestEmbeddingPolicyMargin(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): - # use standard featurizer from EmbeddingPolicy, - # since it is using FullDialogueTrackerFeaturizer p = EmbeddingPolicy( - priority=priority, attn_before_rnn=True, attn_after_rnn=False + featurizer=featurizer, priority=priority, **{"loss_type": "margin"} ) return p + def test_similarity_type(self, trained_policy): + assert trained_policy.similarity_type == "cosine" + -class TestEmbeddingPolicyAttentionAfterRNN(PolicyTestCollection): +class TestEmbeddingPolicyWithEval(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): - # use standard featurizer from EmbeddingPolicy, - # since it is using FullDialogueTrackerFeaturizer p = EmbeddingPolicy( - priority=priority, attn_before_rnn=False, attn_after_rnn=True + featurizer=featurizer, + priority=priority, + **{"scale_loss": False, "evaluate_on_num_examples": 4} ) return p -class TestEmbeddingPolicyAttentionBoth(PolicyTestCollection): +class TestEmbeddingPolicyWithFullDialogue(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): # use standard featurizer from EmbeddingPolicy, # since it is using FullDialogueTrackerFeaturizer - p = EmbeddingPolicy( - priority=priority, attn_before_rnn=True, attn_after_rnn=True - ) + # if max_history is not specified + p = EmbeddingPolicy(priority=priority) return p + def test_featurizer(self, trained_policy, tmpdir): + assert isinstance(trained_policy.featurizer, FullDialogueTrackerFeaturizer) + assert isinstance( + trained_policy.featurizer.state_featurizer, + LabelTokenizerSingleStateFeaturizer, + ) + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert isinstance(loaded.featurizer, FullDialogueTrackerFeaturizer) + assert isinstance( + loaded.featurizer.state_featurizer, LabelTokenizerSingleStateFeaturizer + ) -class TestEmbeddingPolicyWithTfConfig(PolicyTestCollection): + +class TestEmbeddingPolicyWithMaxHistory(TestEmbeddingPolicy): def create_policy(self, featurizer, priority): # use standard featurizer from EmbeddingPolicy, - # since it is using FullDialogueTrackerFeaturizer - p = EmbeddingPolicy(priority=priority, **tf_defaults()) + # since it is using MaxHistoryTrackerFeaturizer + # if max_history is specified + p = EmbeddingPolicy(priority=priority, max_history=self.max_history) + return p + + def test_featurizer(self, trained_policy, tmpdir): + assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer) + assert trained_policy.featurizer.max_history == self.max_history + assert isinstance( + trained_policy.featurizer.state_featurizer, + LabelTokenizerSingleStateFeaturizer, + ) + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) + assert loaded.featurizer.max_history == self.max_history + assert isinstance( + loaded.featurizer.state_featurizer, LabelTokenizerSingleStateFeaturizer + ) + + +class TestEmbeddingPolicyWithTfConfig(TestEmbeddingPolicy): + def create_policy(self, featurizer, priority): + p = EmbeddingPolicy(featurizer=featurizer, priority=priority, **tf_defaults()) return p def test_tf_config(self, trained_policy, tmpdir): @@ -519,12 +460,84 @@ def test_tf_config(self, trained_policy, tmpdir): assert loaded.session._config == session_config() -class TestFormPolicy(PolicyTestCollection): +class TestMemoizationPolicy(PolicyTestCollection): + def create_policy(self, featurizer, priority): + max_history = None + if isinstance(featurizer, MaxHistoryTrackerFeaturizer): + max_history = featurizer.max_history + p = MemoizationPolicy(priority=priority, max_history=max_history) + return p + + def test_featurizer(self, trained_policy, tmpdir): + assert isinstance(trained_policy.featurizer, MaxHistoryTrackerFeaturizer) + assert trained_policy.featurizer.state_featurizer is None + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert isinstance(loaded.featurizer, MaxHistoryTrackerFeaturizer) + assert loaded.featurizer.state_featurizer is None + + async def test_memorise(self, trained_policy, default_domain): + trackers = await train_trackers(default_domain, augmentation_factor=20) + trained_policy.train(trackers, default_domain) + lookup_with_augmentation = trained_policy.lookup + + trackers = [ + t for t in trackers if not hasattr(t, "is_augmented") or not t.is_augmented + ] + + ( + all_states, + all_actions, + ) = trained_policy.featurizer.training_states_and_actions( + trackers, default_domain + ) + + for tracker, states, actions in zip(trackers, all_states, all_actions): + recalled = trained_policy.recall(states, tracker, default_domain) + assert recalled == default_domain.index_for_action(actions[0]) + + nums = np.random.randn(default_domain.num_states) + random_states = [{f: num for f, num in zip(default_domain.input_states, nums)}] + assert trained_policy._recall_states(random_states) is None + + # compare augmentation for augmentation_factor of 0 and 20: + trackers_no_augmentation = await train_trackers( + default_domain, augmentation_factor=0 + ) + trained_policy.train(trackers_no_augmentation, default_domain) + lookup_no_augmentation = trained_policy.lookup + + assert lookup_no_augmentation == lookup_with_augmentation + + def test_memorise_with_nlu(self, trained_policy, default_domain): + filename = "data/test_dialogues/default.json" + dialogue = read_dialogue_file(filename) + + tracker = DialogueStateTracker(dialogue.name, default_domain.slots) + tracker.recreate_from_dialogue(dialogue) + states = trained_policy.featurizer.prediction_states([tracker], default_domain)[ + 0 + ] + + recalled = trained_policy.recall(states, tracker, default_domain) + assert recalled is not None + + +class TestAugmentedMemoizationPolicy(TestMemoizationPolicy): + def create_policy(self, featurizer, priority): + max_history = None + if isinstance(featurizer, MaxHistoryTrackerFeaturizer): + max_history = featurizer.max_history + p = AugmentedMemoizationPolicy(priority=priority, max_history=max_history) + return p + + +class TestFormPolicy(TestMemoizationPolicy): def create_policy(self, featurizer, priority): p = FormPolicy(priority=priority) return p - async def test_memorise(self, trained_policy): + async def test_memorise(self, trained_policy, default_domain): domain = Domain.load("data/test_domains/form.yml") trackers = await training.load_data("data/test_stories/stories_form.md", domain) trained_policy.train(trackers, domain) @@ -582,8 +595,112 @@ async def test_memorise(self, trained_policy): random_states = [{f: num for f, num in zip(domain.input_states, nums)}] assert trained_policy.recall(random_states, None, domain) is None + def test_memorise_with_nlu(self, trained_policy, default_domain): + pass + + +class TestMappingPolicy(PolicyTestCollection): + def create_policy(self, featurizer, priority): + p = MappingPolicy() + return p + + def test_featurizer(self, trained_policy, tmpdir): + assert trained_policy.featurizer is None + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert loaded.featurizer is None + + @pytest.fixture(scope="module") + def domain_with_mapping(self): + return Domain.load(DEFAULT_DOMAIN_PATH_WITH_MAPPING) + + @pytest.fixture + def tracker(self, domain_with_mapping): + return DialogueStateTracker( + UserMessage.DEFAULT_SENDER_ID, domain_with_mapping.slots + ) + + @pytest.fixture( + params=[ + ("default", "utter_default"), + ("greet", "utter_greet"), + (USER_INTENT_RESTART, ACTION_RESTART_NAME), + (USER_INTENT_BACK, ACTION_BACK_NAME), + ] + ) + def intent_mapping(self, request): + return request.param + + def test_predict_mapped_action(self, priority, domain_with_mapping, intent_mapping): + policy = self.create_policy(None, priority) + events = [ + ActionExecuted(ACTION_LISTEN_NAME), + user_uttered(intent_mapping[0], 1), + ] + + assert ( + self._get_next_action(policy, events, domain_with_mapping) + == intent_mapping[1] + ) + + def test_predict_action_listen(self, priority, domain_with_mapping, intent_mapping): + policy = self.create_policy(None, priority) + events = [ + ActionExecuted(ACTION_LISTEN_NAME), + user_uttered(intent_mapping[0], 1), + ActionExecuted(intent_mapping[1], policy="policy_0_MappingPolicy"), + ] + tracker = get_tracker(events) + scores = policy.predict_action_probabilities(tracker, domain_with_mapping) + index = scores.index(max(scores)) + action_planned = domain_with_mapping.action_names[index] + assert action_planned == ACTION_LISTEN_NAME + assert scores != [0] * domain_with_mapping.num_actions + + def test_do_not_follow_other_policy( + self, priority, domain_with_mapping, intent_mapping + ): + policy = self.create_policy(None, priority) + events = [ + ActionExecuted(ACTION_LISTEN_NAME), + user_uttered(intent_mapping[0], 1), + ActionExecuted(intent_mapping[1], policy="other_policy"), + ] + tracker = get_tracker(events) + scores = policy.predict_action_probabilities(tracker, domain_with_mapping) + assert scores == [0] * domain_with_mapping.num_actions + + +class TestFallbackPolicy(PolicyTestCollection): + def create_policy(self, featurizer, priority): + p = FallbackPolicy(priority=priority) + return p + + def test_featurizer(self, trained_policy, tmpdir): + assert trained_policy.featurizer is None + trained_policy.persist(tmpdir.strpath) + loaded = trained_policy.__class__.load(tmpdir.strpath) + assert loaded.featurizer is None + + @pytest.mark.parametrize( + "nlu_confidence, last_action_name, should_nlu_fallback", + [ + (0.1, "some_action", False), + (0.1, "action_listen", True), + (0.9, "some_action", False), + (0.9, "action_listen", False), + ], + ) + def test_should_nlu_fallback( + self, trained_policy, nlu_confidence, last_action_name, should_nlu_fallback + ): + assert ( + trained_policy.should_nlu_fallback(nlu_confidence, last_action_name) + is should_nlu_fallback + ) + -class TestTwoStageFallbackPolicy(PolicyTestCollection): +class TestTwoStageFallbackPolicy(TestFallbackPolicy): def create_policy(self, featurizer, priority): p = TwoStageFallbackPolicy( priority=priority, deny_suggestion_intent_name="deny"