Skip to content

Commit

Permalink
Merge pull request #4899 from RasaHQ/add-most-confused-intents-to-report
Browse files Browse the repository at this point in the history
add function to add frequently confused intent to intent report
  • Loading branch information
amn41 committed Dec 12, 2019
2 parents 7e8c2ae + a566f66 commit 9b937d8
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 5 deletions.
4 changes: 4 additions & 0 deletions changelog/4899.improvement.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
The ``intent_report.json`` created by ``rasa test`` now creates an extra field
``confused_with`` for each intent. This is a dictionary containing the names of
the most common false positives when this intent should be predicted, and the
number of such false positives.
46 changes: 42 additions & 4 deletions rasa/nlu/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from tqdm import tqdm
from typing import (
Iterable,
Collection,
Iterator,
Tuple,
List,
Expand Down Expand Up @@ -398,6 +399,41 @@ def evaluate_response_selections(
}


def _add_confused_intents_to_report(
report: Dict[Text, Dict[Text, float]],
cnf_matrix: np.ndarray,
labels: Collection[Text],
) -> Dict[Text, Dict[Text, Union[Dict, float]]]:
"""Adds a field "confused_with" to the intents in the
intent evaluation report. The value is a dict of
{"false_positive_label": false_positive_count} pairs.
If there are no false positives in the confusion matrix,
the dict will be empty. Typically we include the two most
commonly false positive labels, three in the rare case that
the diagonal element in the confusion matrix is not one of the
three highest values in the row.
"""

# sort confusion matrix by false positives
indices = np.argsort(cnf_matrix, axis=1)
n_candidates = min(3, len(labels))

for label in labels:
# it is possible to predict intent 'None'
if report.get(label):
report[label]["confused_with"] = {}

for i, label in enumerate(labels):
for j in range(n_candidates):
label_idx = indices[i, -(1 + j)]
false_pos_label = labels[label_idx]
false_positives = int(cnf_matrix[i, label_idx])
if false_pos_label != label and false_positives > 0:
report[label]["confused_with"][false_pos_label] = false_positives

return report


def evaluate_intents(
intent_results: List[IntentEvaluationResult],
output_directory: Optional[Text],
Expand All @@ -416,6 +452,8 @@ def evaluate_intents(
Others are filtered out. Returns a dictionary of containing the
evaluation result.
"""
import sklearn.metrics
import sklearn.utils.multiclass

# remove empty intent targets
num_examples = len(intent_results)
Expand All @@ -431,10 +469,14 @@ def evaluate_intents(
intent_results, "intent_target", "intent_prediction"
)

cnf_matrix = sklearn.metrics.confusion_matrix(target_intents, predicted_intents)
labels = sklearn.utils.multiclass.unique_labels(target_intents, predicted_intents)

if output_directory:
report, precision, f1, accuracy = get_evaluation_metrics(
target_intents, predicted_intents, output_dict=True
)
report = _add_confused_intents_to_report(report, cnf_matrix, labels)

report_filename = os.path.join(output_directory, "intent_report.json")

Expand Down Expand Up @@ -463,16 +505,12 @@ def evaluate_intents(
collect_nlu_errors(intent_results, errors_filename)

if confmat_filename:
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
import matplotlib.pyplot as plt

if output_directory:
confmat_filename = os.path.join(output_directory, confmat_filename)
intent_hist_filename = os.path.join(output_directory, intent_hist_filename)

cnf_matrix = confusion_matrix(target_intents, predicted_intents)
labels = unique_labels(target_intents, predicted_intents)
plot_confusion_matrix(
cnf_matrix,
classes=labels,
Expand Down
70 changes: 69 additions & 1 deletion tests/nlu/base/test_evaluation.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from typing import Text

import asyncio
import logging

import pytest
from _pytest.tmpdir import TempdirFactory

import rasa.utils.io
from rasa.test import compare_nlu_models
Expand Down Expand Up @@ -306,7 +309,13 @@ def test_intent_evaluation_report(tmpdir_factory):

report = json.loads(rasa.utils.io.read_file(report_filename))

greet_results = {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1}
greet_results = {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 1,
"confused_with": {},
}

prediction = {
"text": "hello",
Expand All @@ -320,6 +329,65 @@ def test_intent_evaluation_report(tmpdir_factory):
assert result["predictions"][0] == prediction


def test_intent_evaluation_report_large(tmpdir_factory: TempdirFactory):
path = tmpdir_factory.mktemp("evaluation")
report_folder = path / "reports"
report_filename = report_folder / "intent_report.json"

rasa.utils.io.create_directory(str(report_folder))

def correct(label: Text) -> IntentEvaluationResult:
return IntentEvaluationResult(label, label, "", 1.0)

def incorrect(label: Text, _label: Text) -> IntentEvaluationResult:
return IntentEvaluationResult(label, _label, "", 1.0)

a_results = [correct("A")] * 10
b_results = [correct("B")] * 7 + [incorrect("B", "C")] * 3
c_results = [correct("C")] * 3 + [incorrect("C", "D")] + [incorrect("C", "E")]
d_results = [correct("D")] * 29 + [incorrect("D", "B")] * 3
e_results = [incorrect("E", "C")] * 5 + [incorrect("E", "")] * 5

intent_results = a_results + b_results + c_results + d_results + e_results

evaluate_intents(
intent_results,
report_folder,
successes=False,
errors=False,
confmat_filename=None,
intent_hist_filename=None,
)

report = json.loads(rasa.utils.io.read_file(str(report_filename)))

a_results = {
"precision": 1.0,
"recall": 1.0,
"f1-score": 1.0,
"support": 10,
"confused_with": {},
}

e_results = {
"precision": 0.0,
"recall": 0.0,
"f1-score": 0.0,
"support": 10,
"confused_with": {"C": 5, "": 5},
}

c_confused_with = {
"D": 1,
"E": 1,
}

assert len(report.keys()) == 8
assert report["A"] == a_results
assert report["E"] == e_results
assert report["C"]["confused_with"] == c_confused_with


def test_response_evaluation_report(tmpdir_factory):
path = tmpdir_factory.mktemp("evaluation").strpath
report_folder = os.path.join(path, "reports")
Expand Down

0 comments on commit 9b937d8

Please sign in to comment.