In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import time
import json

import pandas as pd

from sklearn.model_selection import train_test_split

from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials

# Charger les variables d'environnement depuis le ficher .env
from dotenv import load_dotenv

load_dotenv()
LUIS_SUBSCRIPTION_KEY = os.getenv("LUIS_SUBSCRIPTION_KEY")
LUIS_ENDPOINT = os.getenv("LUIS_ENDPOINT")

### **Création de l'application LUIS**

In [2]:
# LUIS client
client = LUISAuthoringClient(LUIS_ENDPOINT, CognitiveServicesCredentials(LUIS_SUBSCRIPTION_KEY))

app_id = "bbba74ff-7ad3-4288-be62-dfbd1216893a"

In [14]:
# Create a LUIS app

app_id = client.apps.add({
            'name': "Book Chatbot",
            'initial_version_id':  "0.1",
            'description': "Flight booking app with LUIS Python",
            'culture': 'en-us',
        })

print("Created app {}".format(app_id))

Created app bbba74ff-7ad3-4288-be62-dfbd1216893a


In [3]:
luis_app = client.apps.get(app_id=app_id)

app_version = luis_app.active_version

In [5]:
entities = ['or_city', 'dst_city', 'str_date', 'end_date', 'budget']
prebuilt_entities= ['geographyV2', 'datetimeV2', 'number']
entities_map = {'or_city': 'geographyV2', 'dst_city': 'geographyV2', 'str_date': 'datetimeV2', 'end_date': 'datetimeV2', 'budget': 'number'}

# Add prebuilt entities
for prebuilt in prebuilt_entities:
    prebuilt_entity_id = client.model.add_prebuilt(
        app_id=app_id,
        version_id=app_version,
        prebuilt_extractor_names=[prebuilt],
    )
    print(f"{prebuilt} prebuilt entity created with id {prebuilt_entity_id}")

# Create entities and entity features into the model
for entity in entities:
    entity_id = client.model.add_entity(app_id, app_version, name=entity)
    print(f"{entity} entity created with id {entity_id}")

    fature_id = client.features.add_entity_feature(
        app_id=app_id,
        version_id=app_version,
        entity_id=entity_id,
        feature_relation_create_object={
            "model_name": entities_map[entity],
        },
    )
    print(f"{entities_map[entity]} feature created with id {fature_id} in entity {entity}")

# Create intent into the model
intent_name = "BookFlight"

intent_id = client.model.add_intent(app_id, app_version, name=intent_name)
print(f"{intent_name} intent created with id {intent_id}")

geographyV2 prebuilt entity created with id [<azure.cognitiveservices.language.luis.authoring.models._models_py3.PrebuiltEntityExtractor object at 0x000001E3F450B910>]
datetimeV2 prebuilt entity created with id [<azure.cognitiveservices.language.luis.authoring.models._models_py3.PrebuiltEntityExtractor object at 0x000001E3F450BE50>]
number prebuilt entity created with id [<azure.cognitiveservices.language.luis.authoring.models._models_py3.PrebuiltEntityExtractor object at 0x000001E3F450BCD0>]
or_city entity created with id fb808849-303f-491a-b214-395cb1e0df36
geographyV2 feature created with id {'additional_properties': {}, 'code': 'Success', 'message': 'Operation Successful'} in entity or_city
dst_city entity created with id 0a06ba1c-de24-429c-be24-efb8458fbf47
geographyV2 feature created with id {'additional_properties': {}, 'code': 'Success', 'message': 'Operation Successful'} in entity dst_city
str_date entity created with id 3c296d8f-21db-488d-8c8d-eb3ec596d558
datetimeV2 feature 

### **Entraînement du modèle et publication de l'app**

In [6]:
data = pd.read_json('data/frames.json')
turns_list = data['turns'].to_list()

In [7]:
list_keys_kept = ['or_city', 'dst_city', 'str_date', 'end_date', 'budget']

def get_key_value(arg):
    if "key" in arg and "val" in arg:
        key = arg['key']
        value = arg['val']

        if value != '-1':
            return (key, value)

    return (None, None)


def get_entity_labels(text, key, value):
    
    text = text.lower()
    value = value.lower()

    return {
        'entity_name': key,
        'start_char_index': text.index(value),
        'end_char_index': text.index(value) + len(value)
    }

In [8]:
utterances = []

for turn in turns_list:
    for message in turn:
        # Seulement les messages provenant de l'utilisateur
        if message['author'] == 'user':
            text = message['text']
            entity_labels = []

            for act in message['labels']['acts']:
                for arg in act['args']:

                    key, value = get_key_value(arg)

                    if key not in list_keys_kept or key == None or value == None:
                        continue

                    entity_labels.append(get_entity_labels(text, key, value))
            
            if entity_labels:
                utterances.append({
                    'text': text,
                    'intent_name': 'BookFlight',
                    'entity_labels': entity_labels
                })

In [9]:
train_data, test_data = train_test_split(utterances, test_size=0.2, shuffle=True)

In [10]:
len(train_data)

3500

In [11]:
for i in range(0, len(train_data), 100):
    j = i + 100
    if j > len(train_data):
        j = len(train_data)

    client.examples.batch(
                app_id,
                app_version,
                train_data[i:j]
            )

In [12]:
# Train the model
print("Start training the app...")

client.train.train_version(app_id, app_version)
waiting = True

while waiting:
    info = client.train.get_status(app_id, app_version)

    # get_status returns a list of training statuses, one for each model. Loop through them and make sure all are done.
    waiting = any(map(lambda x: 'Queued' == x.details.status or 'InProgress' == x.details.status, info))
    if waiting:
        print ("Waiting 10 seconds for training to complete...")
        time.sleep(10)
    else: 
        print("The app is trained !")
        waiting = False

Start training the app...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to

In [13]:
# Publish the app
print("Start publishing the app...")

client.apps.update_settings(app_id, is_public=True)
publish_result = client.apps.publish(app_id, app_version, is_staging=False)

endpoint = publish_result.endpoint_url + \
            "?subscription-key=" + LUIS_SUBSCRIPTION_KEY + "&q="

print("The app is published.")

Start publishing the app...
The app is published.


### **Test de l'application**

In [17]:
runtimeCredentials = CognitiveServicesCredentials(LUIS_SUBSCRIPTION_KEY)
clientRuntime = LUISRuntimeClient(endpoint='https://westeurope.api.cognitive.microsoft.com', credentials=runtimeCredentials)


request = { "query" : "I want to go to Paris with 250 dollars, leaving tomorrow" }

predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", request)

In [18]:
print("Top intent: {}".format(predictionResponse.prediction.top_intent))
print("Intents: ")

for intent in predictionResponse.prediction.intents:
    print("\t{}".format (json.dumps (intent)))
print("Entities: {}".format (predictionResponse.prediction.entities))

Top intent: BookFlight
Intents: 
	"BookFlight"
Entities: {'dst_city': ['Paris'], 'geographyV2': [{'value': 'Paris', 'type': 'city'}], 'budget': ['250'], 'number': [250], 'str_date': ['tomorrow'], 'datetimeV2': [{'type': 'date', 'values': [{'timex': '2022-09-16', 'resolution': [{'value': '2022-09-16'}]}]}]}


In [19]:
test_data[0]

{'text': '5400 for the 3 of us',
 'intent_name': 'BookFlight',
 'entity_labels': [{'entity_name': 'budget',
   'start_char_index': 0,
   'end_char_index': 4}]}

In [20]:
def predict(clientRuntime, app_id, query):

    request = { "query" : query }
    response = clientRuntime.prediction.get_slot_prediction(app_id, "Production", request)

    entities = response.prediction.entities
    entities = {k:v[0] for k, v in entities.items()}

    return entities

In [21]:
def format_true_data(data):
    y_true = {}
    text = data['text']

    for entity in data['entity_labels']:
        y_true[entity['entity_name']] = text[entity['start_char_index']:entity['end_char_index']]

    return y_true

In [22]:
def accuracy_score(y_pred, y_true):
    score = 0
    for k, v in y_true.items():
        if k in y_pred:
            if v == y_pred[k]:
                score += 1
            elif v in y_pred[k]:
                score += 0.5

    accuracy = score * 100 /len(y_true)

    return accuracy



In [23]:
def evaluate(test_data):
    scores = []

    for data in test_data:
        y_pred = predict(clientRuntime, app_id, data['text'])
        y_true = format_true_data(data)
        scores.append(accuracy_score(y_pred, y_true))

    mean_score = sum(scores) / len(scores)

    return scores, mean_score

In [24]:
scores, mean_score = evaluate(test_data[0:100])

In [25]:
mean_score

71.04166666666666