### Import données et librairies

In [20]:
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials
from functools import reduce

from sklearn.model_selection import train_test_split
from dotenv import load_dotenv
import os
import pandas as pd
import json, time

In [2]:
data = pd.read_json("../1-Données/frames.json")

### Création LUIS

In [22]:
load_dotenv("../../.env")
authoringKey = os.environ['authoringKey']
authoringEndpoint = os.environ['authoringEndpoint']
predictionKey = os.environ['predictionKey']
predictionEndpoint = os.environ['predictionEndpoint']

In [7]:
# We use a UUID to avoid name collisions.
appName = "Language Detection"
versionId = "1.0"
intentName = "OrderTrip"

In [8]:
client = LUISAuthoringClient(authoringEndpoint, CognitiveServicesCredentials(authoringKey))
client

<azure.cognitiveservices.language.luis.authoring._luis_authoring_client.LUISAuthoringClient at 0x1d9ce396ca0>

In [9]:
define app basics
appDefinition = ApplicationCreateObject (name=appName, initial_version_id=versionId, culture='en-us')

create app
app_id = client.apps.add(appDefinition)

get app id - necessary for all other changes
print("Created LUIS app with ID {}".format(app_id))

# app_id = "9cf1e5a9-564c-42ab-aed9-00fae5aa1513"
# client.apps.get(app_id=app_id)

<azure.cognitiveservices.language.luis.authoring.models._models_py3.ApplicationInfoResponse at 0x1d9d2dbec10>

In [19]:
client.model.add_intent(app_id, versionId, intentName)

'd04800f7-12e0-47f3-a739-6f808f655b10'

In [4]:
ls_entities = [
    "or_city",
    "dst_city",
    "str_date",
    "end_date",
    "budget"
]

for ent in ls_entities:
    client.model.add_entity(app_id, versionId, name=ent)

### Entraînement LUIS

In [16]:
# Formatage des données d'entrée pour LUIS
def get_turn_entities(data, index, ls_entities):
    luis_data = []
    conversation = data["turns"][index][0]
    
    json_part = {}
    txt = conversation["text"].lower()
    json_part["text"] = txt
    json_part["intentName"] = intentName
    # Nous n'utiliserons que ce qu'ont
    # écrit les utilisateurs
    if conversation["author"] == "user":
        for act in conversation["labels"]["acts"]:
            entities = []
            for arg in act["args"]:
                if arg["key"] in ls_entities:
                    entity = {}
                    key = arg["key"].lower()
                    if "val" in arg.keys():
                        val = arg["val"].lower()
                        if val != "-1":
                            startCharIndex = txt.index(val)
                            endCharIndex = startCharIndex + len(val)
                            entity["entityName"] = key
                            entity["startCharIndex"] = startCharIndex
                            entity["endCharIndex"] = endCharIndex
                            entities.append(entity)
            json_part["entityLabels"] = entities

    if (len(json_part)>0):
            if "entityLabels" in json_part.keys():
                if len(json_part["entityLabels"])>0:
                    luis_data.append(json_part)
    return luis_data

def convert_data(data, ls_entities):
    luis_data = []
    for i in range(data.shape[0]):
        json_part = get_turn_entities(data, i, ls_entities)
        if len(json_part)>0:
            for j in range(len(json_part)):
                luis_data.append(json_part[j])
    return luis_data


# pour convertir les données pour LUIS :
luis_data = convert_data(data, ls_entities)

In [18]:
luis_data_train, luis_data_test = train_test_split(luis_data, test_size=0.2, random_state=42)

In [38]:
# On ajoute les requêtes par paquet de 100
for i in range(0, len(luis_data_train), 100):
    client.examples.batch(
        app_id=app_id,
        version_id=versionId,
        example_label_object_array=luis_data_train[i : i + 100],
        )


In [39]:
client.train.train_version(app_id, versionId)
waiting = True
while waiting:
    info = client.train.get_status(app_id, versionId)

    # get_status returns a list of training statuses, one for each model. Loop through them and make sure all are done.
    waiting = any(map(lambda x: 'Queued' == x.details.status or 'InProgress' == x.details.status, info))
    if waiting:
        print ("Waiting 10 seconds for training to complete...")
        time.sleep(10)
    else: 
        print ("trained")
        waiting = False

Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
trained


In [40]:
# Publication
client.apps.update_settings(app_id, is_public=True)

responseEndpointInfo = client.apps.publish(app_id, versionId, is_staging=False)

### Prédictions LUIS

In [5]:
runtimeCredentials = CognitiveServicesCredentials(predictionKey)
clientRuntime = LUISRuntimeClient(endpoint=predictionEndpoint, credentials=runtimeCredentials)

In [40]:
text = "i have 11 days for vacation this year! i only have $400 to spend tho. i leave from santiago. what kind of packages are there?"
predictionRequest = { "query" : text }

predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
predictionResponse

<azure.cognitiveservices.language.luis.runtime.models._models_py3.PredictionResponse at 0x1d994420d60>

In [41]:
predictionResponse.prediction.intents

{'OrderTrip': <azure.cognitiveservices.language.luis.runtime.models._models_py3.Intent at 0x1d9942a89d0>}

In [42]:
predictionResponse.prediction.entities

{'budget': ['$400'], 'or_city': ['santiago.']}

In [46]:
# Prédictions de notre jeu de test
list_text = []
list_real_entity_budget = []
list_real_entity_or_city = []
list_real_entity_dst_city = []
list_real_entity_str_date = []
list_real_entity_end_date = []
list_predicted_entity_budget = []
list_predicted_entity_or_city = []
list_predicted_entity_dst_city = []
list_predicted_entity_str_date = []
list_predicted_entity_end_date = []

# Boucle sur données de test
for sample in luis_data_test:
    # Texte
    text = sample["text"]
    list_text.append(text)
    
    # Entités
    entities = sample['entityLabels']
    
    # Entités attendues    
    or_city = False
    dst_city = False
    str_date = False
    end_date = False
    budget = False
    for ent in entities:
        entity = ent['entityName']
        if entity == "or_city":
            list_real_entity_or_city.append(text[ent['startCharIndex']:ent['endCharIndex']])
            or_city = True
        if entity == "dst_city":
            list_real_entity_dst_city.append(text[ent['startCharIndex']:ent['endCharIndex']])
            dst_city =  True
        if entity == "str_date":
            list_real_entity_str_date.append(text[ent['startCharIndex']:ent['endCharIndex']])
            str_date = True
        if entity == "end_date":
            list_real_entity_end_date.append(text[ent['startCharIndex']:ent['endCharIndex']])
            end_date =  True
        if entity == "budget":
            list_real_entity_budget.append(text[ent['startCharIndex']:ent['endCharIndex']])
            budget =  True
    if not or_city:
        list_real_entity_or_city.append(0)
    if not dst_city:
        list_real_entity_dst_city.append(0)
    if not str_date:
        list_real_entity_str_date.append(0)
    if not end_date:
        list_real_entity_end_date.append(0)
    if not budget:
        list_real_entity_budget.append(0)   
    
    # Prédiction
    predictionRequest = { "query" : text }
    predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
    response = predictionResponse.prediction.entities
    
    # Entités prédites  
    for ent in ls_entities:        
        if ent == "or_city":
            try:
                list_predicted_entity_or_city.append(response[ent][0].lower())
            except KeyError:
                list_predicted_entity_or_city.append(0)
        if ent == "dst_city":
            try:
                list_predicted_entity_dst_city.append(response[ent][0].lower())
            except KeyError:
                list_predicted_entity_dst_city.append(0)
        if ent == "str_date":
            try:
                list_predicted_entity_str_date.append(response[ent][0].lower())
            except KeyError:
                list_predicted_entity_str_date.append(0)
        if ent == "end_date":
            try:
                list_predicted_entity_end_date.append(response[ent][0].lower())
            except KeyError:
                list_predicted_entity_end_date.append(0)
        if ent == "budget":
            try:
                list_predicted_entity_budget.append(response[ent][0].lower())
            except KeyError:
                list_predicted_entity_budget.append(0)

# Construction du dataframe et enregistrement des résultats dans un fichier csv
df_results = pd.DataFrame(list(zip(list_text, list_real_entity_or_city, list_real_entity_dst_city,
                                   list_real_entity_str_date, list_real_entity_end_date,
                                   list_real_entity_budget, list_predicted_entity_or_city,
                                  list_predicted_entity_dst_city, list_predicted_entity_str_date,
                                   list_predicted_entity_end_date, list_predicted_entity_budget)),
               columns =['text', 'real_or_city ', 'real_dst_city', 'real_str_date', 'real_end_date', 'real_budget',
                         'predicted_or_city ', 'predicted_dst_city', 'predicted_str_date', 'predicted_end_date', 'predicted_budget'])
df_results.to_csv('luis_results.csv', index=False, sep=";")

In [10]:
# Chargement des résultats
df_results = pd.read_csv('luis_results.csv', sep=";")

In [7]:
# Rappel
def entity_recall(df, num_col):
    vpfn = 0
    vp = 0
    for i in range(len(df)):
        if df.iloc[i,num_col] != 0:
            vpfn = vpfn + 1
            if df.iloc[i,num_col] ==  df.iloc[i,num_col+5]:
                vp = vp + 1
    return (vp/vpfn)

In [16]:
# Calcul des rappels de chanque entité
print("Rappels :")
rc_or_city = entity_recall(df_results, 1)
print("Ville de départ :"+str(round(rc_or_city,2)))
rc_dst_city = entity_recall(df_results, 2)
print("Ville de destination :"+str(round(rc_dst_city,2)))
rc_str_date = entity_recall(df_results, 3)
print("Date aller souhaitée du vol :"+str(round(rc_or_city,2)))
rc_end_date = entity_recall(df_results, 4)
print("Date retour souhaitée du vol :"+str(round(rc_end_date,2)))
rc_budget = entity_recall(df_results, 5)
print("Budget maximum pour le prix total des billets :"+str(round(rc_budget,2)))

Rappels :
Ville de départ :0.77
Ville de destination :0.67
Date aller souhaitée du vol :0.77
Date retour souhaitée du vol :0.9
Budget maximum pour le prix total des billets :0.91
