Skip to content

Commit

Permalink
Merge pull request #6038 from RasaHQ/diet-entity-confidence
Browse files Browse the repository at this point in the history
DIETClassifier adds a confidence value to entity predictions
  • Loading branch information
tabergma committed Jun 17, 2020
2 parents 4151446 + a9b0c76 commit 923db75
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 13 deletions.
1 change: 1 addition & 0 deletions changelog/5481.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``DIETClassifier`` now also assigns a confidence value to entity predictions.
4 changes: 2 additions & 2 deletions docs/nlu/entity-extraction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,9 @@ exactly. Instead it will return the trained synonym.
.. note::

The ``confidence`` will be set by the ``CRFEntityExtractor`` component. The
The ``confidence`` will be set by the ``CRFEntityExtractor`` and the ``DIETClassifier`` component. The
``DucklingHTTPExtractor`` will always return ``1``. The ``SpacyEntityExtractor`` extractor
and ``DIETClassifier`` do not provide this information and return ``null``.
does not provide this information and returns ``null``.


Some extractors, like ``duckling``, may include additional information. For example:
Expand Down
22 changes: 16 additions & 6 deletions rasa/nlu/classifiers/diet_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,10 +798,13 @@ def _predict_entities(
if predict_out is None:
return []

predicted_tags = self._entity_label_to_tags(predict_out)
predicted_tags, confidence_values = self._entity_label_to_tags(predict_out)

entities = self.convert_predictions_into_entities(
message.text, message.get(TOKENS_NAMES[TEXT], []), predicted_tags
message.text,
message.get(TOKENS_NAMES[TEXT], []),
predicted_tags,
confidence_values,
)

entities = self.add_extractor_name(entities)
Expand All @@ -811,20 +814,24 @@ def _predict_entities(

def _entity_label_to_tags(
self, predict_out: Dict[Text, Any]
) -> Dict[Text, List[Text]]:
) -> Tuple[Dict[Text, List[Text]], Dict[Text, List[float]]]:
predicted_tags = {}
confidence_values = {}

for tag_spec in self._entity_tag_specs:
predictions = predict_out[f"e_{tag_spec.tag_name}_ids"].numpy()
confidences = predict_out[f"e_{tag_spec.tag_name}_scores"].numpy()
confidences = [float(c) for c in confidences[0]]
tags = [tag_spec.ids_to_tags[p] for p in predictions[0]]

if self.component_config[BILOU_FLAG]:
tags = bilou_utils.ensure_consistent_bilou_tagging(tags)
tags = bilou_utils.remove_bilou_prefixes(tags)

predicted_tags[tag_spec.tag_name] = tags
confidence_values[tag_spec.tag_name] = confidences

return predicted_tags
return predicted_tags, confidence_values

def process(self, message: Message, **kwargs: Any) -> None:
"""Return the most likely label and its similarity to the input."""
Expand Down Expand Up @@ -1479,7 +1486,7 @@ def _calculate_entity_loss(
logits = self._tf_layers[f"embed.{tag_name}.logits"](inputs)

# should call first to build weights
pred_ids = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths)
pred_ids, _ = self._tf_layers[f"crf.{tag_name}"](logits, sequence_lengths)
# pytype cannot infer that 'self._tf_layers["crf"]' has the method '.loss'
# pytype: disable=attribute-error
loss = self._tf_layers[f"crf.{tag_name}"].loss(
Expand Down Expand Up @@ -1671,9 +1678,12 @@ def _batch_predict_entities(
_input = tf.concat([_input, _tags], axis=-1)

_logits = self._tf_layers[f"embed.{name}.logits"](_input)
pred_ids = self._tf_layers[f"crf.{name}"](_logits, sequence_lengths - 1)
pred_ids, confidences = self._tf_layers[f"crf.{name}"](
_logits, sequence_lengths - 1
)

predictions[f"e_{name}_ids"] = pred_ids
predictions[f"e_{name}_scores"] = confidences

if name == ENTITY_ATTRIBUTE_TYPE:
# use the entity tags as additional input for the role
Expand Down
2 changes: 1 addition & 1 deletion rasa/nlu/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# performs entity extraction but those two classifiers don't
ENTITY_PROCESSORS = {"EntitySynonymMapper", "ResponseSelector"}

EXTRACTORS_WITH_CONFIDENCES = {"CRFEntityExtractor"}
EXTRACTORS_WITH_CONFIDENCES = {"CRFEntityExtractor", "DIETClassifier"}

CVEvaluationResult = namedtuple("Results", "train test")

Expand Down
217 changes: 217 additions & 0 deletions rasa/utils/tensorflow/crf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
import tensorflow as tf

from tensorflow_addons.utils.types import TensorLike
from typeguard import typechecked
from typing import Tuple


# original code taken from
# https://github.com/tensorflow/addons/blob/master/tensorflow_addons/text/crf.py
# (modified to our neeeds)


class CrfDecodeForwardRnnCell(tf.keras.layers.AbstractRNNCell):
"""Computes the forward decoding in a linear-chain CRF."""

@typechecked
def __init__(self, transition_params: TensorLike, **kwargs) -> None:
"""Initialize the CrfDecodeForwardRnnCell.
Args:
transition_params: A [num_tags, num_tags] matrix of binary
potentials. This matrix is expanded into a
[1, num_tags, num_tags] in preparation for the broadcast
summation occurring within the cell.
"""
super().__init__(**kwargs)
self._transition_params = tf.expand_dims(transition_params, 0)
self._num_tags = transition_params.shape[0]

@property
def state_size(self) -> int:
return self._num_tags

@property
def output_size(self) -> int:
return self._num_tags

def build(self, input_shape):
super().build(input_shape)

def call(
self, inputs: TensorLike, state: TensorLike
) -> Tuple[tf.Tensor, tf.Tensor]:
"""Build the CrfDecodeForwardRnnCell.
Args:
inputs: A [batch_size, num_tags] matrix of unary potentials.
state: A [batch_size, num_tags] matrix containing the previous step's
score values.
Returns:
output: A [batch_size, num_tags * 2] matrix of backpointers and scores.
new_state: A [batch_size, num_tags] matrix of new score values.
"""
state = tf.expand_dims(state[0], 2)
transition_scores = state + self._transition_params
new_state = inputs + tf.reduce_max(transition_scores, [1])

backpointers = tf.argmax(transition_scores, 1)
backpointers = tf.cast(backpointers, tf.float32)

# apply softmax to transition_scores to get scores in range from 0 to 1
scores = tf.reduce_max(tf.nn.softmax(transition_scores, axis=1), [1])

# In the RNN implementation only the first value that is returned from a cell
# is kept throughout the RNN, so that you will have the values from each time
# step in the final output. As we need the backpointers as well as the scores
# for each time step, we concatenate them.
return tf.concat([backpointers, scores], axis=1), new_state


def crf_decode_forward(
inputs: TensorLike,
state: TensorLike,
transition_params: TensorLike,
sequence_lengths: TensorLike,
) -> Tuple[tf.Tensor, tf.Tensor]:
"""Computes forward decoding in a linear-chain CRF.
Args:
inputs: A [batch_size, num_tags] matrix of unary potentials.
state: A [batch_size, num_tags] matrix containing the previous step's
score values.
transition_params: A [num_tags, num_tags] matrix of binary potentials.
sequence_lengths: A [batch_size] vector of true sequence lengths.
Returns:
output: A [batch_size, num_tags * 2] matrix of backpointers and scores.
new_state: A [batch_size, num_tags] matrix of new score values.
"""
sequence_lengths = tf.cast(sequence_lengths, dtype=tf.int32)
mask = tf.sequence_mask(sequence_lengths, tf.shape(inputs)[1])
crf_fwd_cell = CrfDecodeForwardRnnCell(transition_params)
crf_fwd_layer = tf.keras.layers.RNN(
crf_fwd_cell, return_sequences=True, return_state=True
)
return crf_fwd_layer(inputs, state, mask=mask)


def crf_decode_backward(
backpointers: TensorLike, scores: TensorLike, state: TensorLike
) -> Tuple[tf.Tensor, tf.Tensor]:
"""Computes backward decoding in a linear-chain CRF.
Args:
backpointers: A [batch_size, num_tags] matrix of backpointer of next step
(in time order).
scores: A [batch_size, num_tags] matrix of scores of next step (in time order).
state: A [batch_size, 1] matrix of tag index of next step.
Returns:
new_tags: A [batch_size, num_tags] tensor containing the new tag indices.
new_scores: A [batch_size, num_tags] tensor containing the new score values.
"""
backpointers = tf.transpose(backpointers, [1, 0, 2])
scores = tf.transpose(scores, [1, 0, 2])

def _scan_fn(_state: TensorLike, _inputs: TensorLike) -> tf.Tensor:
_state = tf.cast(tf.squeeze(_state, axis=[1]), dtype=tf.int32)
idxs = tf.stack([tf.range(tf.shape(_inputs)[0]), _state], axis=1)
return tf.expand_dims(tf.gather_nd(_inputs, idxs), axis=-1)

output_tags = tf.scan(_scan_fn, backpointers, state)
# the dtype of the input parameters of tf.scan need to match
# convert state to float32 to match the type of scores
state = tf.cast(state, dtype=tf.float32)
output_scores = tf.scan(_scan_fn, scores, state)

return tf.transpose(output_tags, [1, 0, 2]), tf.transpose(output_scores, [1, 0, 2])


def crf_decode(
potentials: TensorLike, transition_params: TensorLike, sequence_length: TensorLike
) -> Tuple[tf.Tensor, tf.Tensor, tf.Tensor]:
"""Decode the highest scoring sequence of tags.
Args:
potentials: A [batch_size, max_seq_len, num_tags] tensor of
unary potentials.
transition_params: A [num_tags, num_tags] matrix of
binary potentials.
sequence_length: A [batch_size] vector of true sequence lengths.
Returns:
decode_tags: A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
Contains the highest scoring tag indices.
decode_scores: A [batch_size, max_seq_len] matrix, containing the score of
`decode_tags`.
best_score: A [batch_size] vector, containing the best score of `decode_tags`.
"""
sequence_length = tf.cast(sequence_length, dtype=tf.int32)

# If max_seq_len is 1, we skip the algorithm and simply return the
# argmax tag and the max activation.
def _single_seq_fn():
decode_tags = tf.cast(tf.argmax(potentials, axis=2), dtype=tf.int32)
decode_scores = tf.reduce_max(tf.nn.softmax(potentials, axis=2), axis=2)
best_score = tf.reshape(tf.reduce_max(potentials, axis=2), shape=[-1])
return decode_tags, decode_scores, best_score

def _multi_seq_fn():
# Computes forward decoding. Get last score and backpointers.
initial_state = tf.slice(potentials, [0, 0, 0], [-1, 1, -1])
initial_state = tf.squeeze(initial_state, axis=[1])
inputs = tf.slice(potentials, [0, 1, 0], [-1, -1, -1])

sequence_length_less_one = tf.maximum(
tf.constant(0, dtype=tf.int32), sequence_length - 1
)

output, last_score = crf_decode_forward(
inputs, initial_state, transition_params, sequence_length_less_one
)

# output is a matrix of size [batch-size, max-seq-length, num-tags * 2]
# split the matrix on axis 2 to get the backpointers and scores, which are
# both of size [batch-size, max-seq-length, num-tags]
backpointers, scores = tf.split(output, 2, axis=2)

backpointers = tf.cast(backpointers, dtype=tf.int32)
backpointers = tf.reverse_sequence(
backpointers, sequence_length_less_one, seq_axis=1
)

scores = tf.reverse_sequence(scores, sequence_length_less_one, seq_axis=1)

initial_state = tf.cast(tf.argmax(last_score, axis=1), dtype=tf.int32)
initial_state = tf.expand_dims(initial_state, axis=-1)

initial_score = tf.reduce_max(tf.nn.softmax(last_score, axis=1), axis=[1])
initial_score = tf.expand_dims(initial_score, axis=-1)

decode_tags, decode_scores = crf_decode_backward(
backpointers, scores, initial_state
)

decode_tags = tf.squeeze(decode_tags, axis=[2])
decode_tags = tf.concat([initial_state, decode_tags], axis=1)
decode_tags = tf.reverse_sequence(decode_tags, sequence_length, seq_axis=1)

decode_scores = tf.squeeze(decode_scores, axis=[2])
decode_scores = tf.concat([initial_score, decode_scores], axis=1)
decode_scores = tf.reverse_sequence(decode_scores, sequence_length, seq_axis=1)

best_score = tf.reduce_max(last_score, axis=1)

return decode_tags, decode_scores, best_score

if potentials.shape[1] is not None:
# shape is statically know, so we just execute
# the appropriate code path
if potentials.shape[1] == 1:
return _single_seq_fn()

return _multi_seq_fn()

return tf.cond(tf.equal(tf.shape(potentials)[1], 1), _single_seq_fn, _multi_seq_fn)
18 changes: 14 additions & 4 deletions rasa/utils/tensorflow/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import List, Optional, Text, Tuple, Callable, Union, Any
import tensorflow as tf
import tensorflow_addons as tfa
import rasa.utils.tensorflow.crf
from tensorflow.python.keras.utils import tf_utils
from tensorflow.python.keras import backend as K
from rasa.utils.tensorflow.constants import SOFTMAX, MARGIN, COSINE, INNER
Expand Down Expand Up @@ -460,7 +461,9 @@ def build(self, input_shape: tf.TensorShape) -> None:
self.built = True

# noinspection PyMethodOverriding
def call(self, logits: tf.Tensor, sequence_lengths: tf.Tensor) -> tf.Tensor:
def call(
self, logits: tf.Tensor, sequence_lengths: tf.Tensor
) -> Tuple[tf.Tensor, tf.Tensor]:
"""Decodes the highest scoring sequence of tags.
Arguments:
Expand All @@ -471,16 +474,23 @@ def call(self, logits: tf.Tensor, sequence_lengths: tf.Tensor) -> tf.Tensor:
Returns:
A [batch_size, max_seq_len] matrix, with dtype `tf.int32`.
Contains the highest scoring tag indices.
A [batch_size, max_seq_len] matrix, with dtype `tf.float32`.
Contains the confidence values of the highest scoring tag indices.
"""
pred_ids, _ = tfa.text.crf.crf_decode(
predicted_ids, scores, _ = rasa.utils.tensorflow.crf.crf_decode(
logits, self.transition_params, sequence_lengths
)
# set prediction index for padding to `0`
mask = tf.sequence_mask(
sequence_lengths, maxlen=tf.shape(pred_ids)[1], dtype=pred_ids.dtype
sequence_lengths,
maxlen=tf.shape(predicted_ids)[1],
dtype=predicted_ids.dtype,
)

return pred_ids * mask
confidence_values = scores * tf.cast(mask, tf.float32)
predicted_ids = predicted_ids * mask

return predicted_ids, confidence_values

def loss(
self, logits: tf.Tensor, tag_indices: tf.Tensor, sequence_lengths: tf.Tensor
Expand Down
15 changes: 15 additions & 0 deletions tests/nlu/test_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,21 @@ def test_determine_token_labels_with_extractors():
["CRFEntityExtractor"],
0.87,
),
(
Token("pizza", 4),
[
{
"start": 4,
"end": 9,
"value": "pizza",
"entity": "food",
"confidence_entity": 0.87,
"extractor": "DIETClassifier",
}
],
["DIETClassifier"],
0.87,
),
],
)
def test_get_entity_confidences(
Expand Down

0 comments on commit 923db75

Please sign in to comment.