In [41]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import os
import time
import json

import pandas as pd

from sklearn.model_selection import train_test_split

from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials

# Charger les variables d'environnement depuis le ficher .env
from dotenv import load_dotenv

load_dotenv()
LUIS_SUBSCRIPTION_KEY = os.getenv("LUIS_SUBSCRIPTION_KEY")
LUIS_ENDPOINT = os.getenv("LUIS_ENDPOINT")

### **Création de l'application LUIS**

In [8]:
# LUIS client
client = LUISAuthoringClient(LUIS_ENDPOINT, CognitiveServicesCredentials(LUIS_SUBSCRIPTION_KEY))

app_id = "e06fb540-29d5-4a98-b460-ff27258f15b0"

In [9]:
# Create a LUIS app

#app_id = client.apps.add({
#            'name': "Book Chatbot",
#            'initial_version_id':  "0.1",
#            'description': "Flight booking app with LUIS Python",
#            'culture': 'en-us',
#        })

print("Created app {}".format(app_id))

Created app e06fb540-29d5-4a98-b460-ff27258f15b0


In [10]:
luis_app = client.apps.get(app_id=app_id)

app_version = luis_app.active_version

In [11]:
entities = ['or_city', 'dst_city', 'str_date', 'end_date', 'budget']

# Create entities into the model
for entity in entities:
    entity_id = client.model.add_entity(app_id, app_version, name=entity)
    print(f"{entity} entity created with id {entity_id}")

# Create intent into the model
intent_name = "BookFlight"

intent_id = client.model.add_intent(app_id, app_version, name=intent_name)
print(f"{intent_name} intent created with id {intent_id}")

or_city entity created with id 988c0336-2a9c-4226-aeba-e84d7c0c5661
dst_city entity created with id 5b1029ae-2c77-4a01-b5a2-27589bf085b2
str_date entity created with id aa816a6f-584d-4c02-aace-fd3b9fd0eb31
end_date entity created with id 077435d8-f2c5-4384-b667-5990fd4811b6
budget entity created with id a4262255-68e4-4608-bbe2-5e99152174de
BookFlight intent created with id 73f39c7a-7175-48b8-8cf6-2c06ee2a2b9b


### **Entraînement du modèle et publication de l'app**

In [14]:
data = pd.read_json('data/frames.json')
turns_list = data['turns'].to_list()

In [13]:
list_keys_kept = ['or_city', 'dst_city', 'str_date', 'end_date', 'budget']

def get_key_value(arg):
    if "key" in arg and "val" in arg:
        key = arg['key']
        value = arg['val']

        if value != '-1':
            return (key, value)

    return (None, None)


def get_entity_labels(text, key, value):
    
    text = text.lower()
    value = value.lower()

    return {
        'entity_name': key,
        'start_char_index': text.index(value),
        'end_char_index': text.index(value) + len(value)
    }

In [15]:
utterances = []

for turn in turns_list:
    for message in turn:
        # Seulement les messages provenant de l'utilisateur
        if message['author'] == 'user':
            text = message['text']
            entity_labels = []

            for act in message['labels']['acts']:
                for arg in act['args']:

                    key, value = get_key_value(arg)

                    if key not in list_keys_kept or key == None or value == None:
                        continue

                    entity_labels.append(get_entity_labels(text, key, value))
            
            if entity_labels:
                utterances.append({
                    'text': text,
                    'intent_name': 'BookFlight',
                    'entity_labels': entity_labels
                })

In [24]:
train_data, test_data = train_test_split(utterances, test_size=0.2, shuffle=True)

In [29]:
len(train_data)

3500

In [None]:
for i in range(0, len(train_data), 100):
    j = i + 100
    if j > len(train_data):
        j = len(train_data)

    client.examples.batch(
                app_id,
                app_version,
                train_data[i:j]
            )

In [34]:
# Train the model
print("Start training the app...")

client.train.train_version(app_id, app_version)
waiting = True

while waiting:
    info = client.train.get_status(app_id, app_version)

    # get_status returns a list of training statuses, one for each model. Loop through them and make sure all are done.
    waiting = any(map(lambda x: 'Queued' == x.details.status or 'InProgress' == x.details.status, info))
    if waiting:
        print ("Waiting 10 seconds for training to complete...")
        time.sleep(10)
    else: 
        print("The app is trained !")
        waiting = False

Start training the app...
The app is trained !


In [67]:
# Publish the app
print("Start publishing the app...")

client.apps.update_settings(app_id, is_public=True)
publish_result = client.apps.publish(app_id, app_version, is_staging=False)

endpoint = publish_result.endpoint_url + \
            "?subscription-key=" + LUIS_SUBSCRIPTION_KEY + "&q="

print("The app is published.")

Start publishing the app...


### **Test de l'application**

In [48]:
runtimeCredentials = CognitiveServicesCredentials(LUIS_SUBSCRIPTION_KEY)
clientRuntime = LUISRuntimeClient(endpoint='https://westeurope.api.cognitive.microsoft.com', credentials=runtimeCredentials)


request = { "query" : "I want to go to Paris with 250 dollars" }

predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", request)

In [49]:
print("Top intent: {}".format(predictionResponse.prediction.top_intent))
print("Intents: ")

for intent in predictionResponse.prediction.intents:
    print("\t{}".format (json.dumps (intent)))
print("Entities: {}".format (predictionResponse.prediction.entities))

Top intent: BookFlight
Intents: 
	"BookFlight"
Entities: {'dst_city': ['Paris'], 'budget': ['250 dollars']}


In [52]:
test_data[0]

{'text': 'i have 1600',
 'intent_name': 'BookFlight',
 'entity_labels': [{'entity_name': 'budget',
   'start_char_index': 7,
   'end_char_index': 11}]}

In [50]:
def predict(clientRuntime, app_id, query):

    request = { "query" : query }
    response = clientRuntime.prediction.get_slot_prediction(app_id, "Production", request)

    entities = response.prediction.entities
    entities = {k:v[0] for k, v in entities.items()}

    return entities

In [62]:
def format_true_data(data):
    y_true = {}
    text = data['text']

    for entity in data['entity_labels']:
        y_true[entity['entity_name']] = text[entity['start_char_index']:entity['end_char_index']]

    return y_true

In [54]:
def accuracy_score(y_pred, y_true):
    score = 0
    for k, v in y_true.items():
        if k in y_pred:
            if v == y_pred[k]:
                score += 1
            elif v in y_pred[k]:
                score += 0.5

    accuracy = score * 100 /len(y_true)

    return accuracy



In [55]:
def evaluate(test_data):
    scores = []

    for data in test_data:
        y_pred = predict(clientRuntime, app_id, data['text'])
        y_true = format_true_data(data)
        scores.append(accuracy_score(y_pred, y_true))

    mean_score = sum(scores) / len(scores)

    return scores, mean_score

In [64]:
scores, mean_score = evaluate(test_data[0:100])

In [66]:
mean_score

70.45833333333334