Skip to content
This repository has been archived by the owner on Jun 3, 2024. It is now read-only.

nlu_client: use classifier weights to ignore ambiguous results #218

Merged
merged 1 commit into from
Mar 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 52 additions & 4 deletions idunn/geocoder/nlu_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import httpx
import logging
import re
from typing import Optional
from unidecode import unidecode

from idunn.api.places_list import MAX_HEIGHT, MAX_WIDTH
Expand Down Expand Up @@ -48,6 +49,10 @@ def reason(self):


class NLU_Helper: # pylint: disable = invalid-name
CLASSIF_MIN_UNK_IGNORED = float(settings["NLU_CLASSIFIER_MIN_UNK_IGNORED"])
CLASSIF_CATEGORY_MIN_WEIGHT = float(settings["NLU_CLASSIFIER_CATEGORY_MIN_WEIGHT"])
CLASSIF_MAX_WEIGHT_RATIO = float(settings["NLU_CLASSIFIER_MAX_WEIGHT_RATIO"])

def __init__(self):
self.client = httpx.AsyncClient(timeout=0.3, verify=settings["VERIFY_HTTPS"])

Expand All @@ -56,12 +61,55 @@ async def post_nlu_classifier(self, text):
classifier_domain = settings["NLU_CLASSIFIER_DOMAIN"]
response_classifier = await self.client.post(
classifier_url,
json={"text": text, "domain": classifier_domain, "language": "fr", "count": 1},
json={
"text": text,
"domain": classifier_domain,
"language": "fr",
"count": 10,
},
)
response_classifier.raise_for_status()
return response_classifier

async def nlu_classifier(self, text):
@classmethod
def nlu_classifier_handle_response(cls, response) -> Optional[Category]:
"""
Analyze raw classifier response to find an unambiguous category from
its result if any.

>>> NLU_Helper.nlu_classifier_handle_response({
... "intention": [(0.95, "restaurant"), (0.01, "cinema")],
... }).value
'restaurant'
>>> NLU_Helper.nlu_classifier_handle_response({
... "intention": [(0.02, "restaurant"), (0.01, "cinema")],
... }) is None
True
"""
categories = {cat: weight for weight, cat in response["intention"]}

if (
# The classifier puts high probability to be unclassified ("unk").
categories.pop("unk", 0) >= cls.CLASSIF_MIN_UNK_IGNORED
# No category found by the classifier.
or not categories
):
return None

best, max_weight = max(categories.items(), key=lambda x: x[1])
del categories[best]

if (
# There is no category with a fair probability.
max_weight < cls.CLASSIF_CATEGORY_MIN_WEIGHT
# There is at least one category with a weight close to the best one.
or (categories and max(categories.values()) > cls.CLASSIF_MAX_WEIGHT_RATIO * max_weight)
):
return None

return Category.__members__.get(best)

async def nlu_classifier(self, text) -> Optional[Category]:
try:
response_classifier = await classifier_circuit_breaker.call_async(
self.post_nlu_classifier, text
Expand All @@ -70,10 +118,10 @@ async def nlu_classifier(self, text):
logger.error("Request to NLU classifier failed", exc_info=True)
return None

return response_classifier.json()["intention"][0][1]
return self.nlu_classifier_handle_response(response_classifier.json())

@staticmethod
def regex_classifier(text, is_brand=False):
def regex_classifier(text, is_brand=False) -> Optional[Category]:
"""Match text with a category, using 'regex'
>>> NLU_Helper.regex_classifier("restau").value
'restaurant'
Expand Down
5 changes: 5 additions & 0 deletions idunn/utils/default_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ NLU_TAGGER_LOWERCASE: True
NLU_CLASSIFIER_URL:
NLU_CLASSIFIER_DOMAIN: "poi"

# Classifier parameters
NLU_CLASSIFIER_MIN_UNK_IGNORED: 0.1 # Minimum value for "unk" to consider query as unclassified
NLU_CLASSIFIER_CATEGORY_MIN_WEIGHT: 0.15 # Minimum weight required for a category
NLU_CLASSIFIER_MAX_WEIGHT_RATIO: 0.6 # Maximal ratio with the second best result

NLU_BREAKER_TIMEOUT: 120 # timeout period in seconds
NLU_BREAKER_MAXFAIL: 5 # consecutive failures before breaking

Expand Down
8 changes: 8 additions & 0 deletions tests/fixtures/autocomplete/classif_pharmacy.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,17 @@
"count": 1,
"domain": "poi",
"intention": [
[
1.0000003385357559e-05,
"health_other"
],
[
1.0000100135803223,
"pharmacy"
],
[
1.0000003385357559e-05,
"place_of_worship"
]
],
"text": "pharmacy",
Expand Down
11 changes: 11 additions & 0 deletions tests/test_nlu_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from idunn.geocoder.nlu_client import NLU_Helper


def test_classifier_handle_response():
cat_for = NLU_Helper.nlu_classifier_handle_response
assert cat_for({"intention": []}) is None
assert cat_for({"intention": [(0.10, "restaurant")]}) is None
assert cat_for({"intention": [(0.99, "restaurant")]}).value == "restaurant"
assert cat_for({"intention": [(0.99, "restaurant"), (0.90, "pharmacy")]}) is None
assert cat_for({"intention": [(0.99, "restaurant"), (0.20, "unk")]}) is None
assert cat_for({"intention": [(0.99, "restaurant"), (0.01, "unk")]}).value == "restaurant"