In [1]:
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.authoring.models import ApplicationCreateObject
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient
from msrest.authentication import CognitiveServicesCredentials
from functools import reduce
import numpy as np
import pandas as pd
import json, time, uuid, os 

In [37]:
# authoringKey = '58c164d9218d49fbbded211388bb06ad'
# authoringEndpoint = 'https://luisoc-authoring.cognitiveservices.azure.com/'
# predictionKey = '10cf4c925d3640e39afc9c76af3488c8'
# predictionEndpoint = 'https://luis-oc.cognitiveservices.azure.com/'
# LUIS_SLOT_NAME = 'Production'
# LUIS_APP_ID = '92cd3bfb-bb53-4af9-8f74-2c307d7828e4'

authoringKey ='58c164d9218d49fbbded211388bb06ad'
authoringEndpoint = 'https://luisoc-authoring.cognitiveservices.azure.com/'
predictionKey = '10cf4c925d3640e39afc9c76af3488c8'
predictionEndpoint = 'https://luis-oc.cognitiveservices.azure.com/'
LUIS_SLOT_NAME = 'Production'
LUIS_APP_ID = '4a9683a7-525a-4df5-aa8d-e4f9f088ce51'

appName = "FlightBooking2_" + str(uuid.uuid4())
versionId = "0.1"

client = LUISAuthoringClient(authoringEndpoint, CognitiveServicesCredentials(authoringKey))
clientRuntime = LUISRuntimeClient(predictionEndpoint, CognitiveServicesCredentials(predictionKey))

#Define app basics 
appDefinition = ApplicationCreateObject(name=appName, initial_version_id=versionId, culture='fr-fr')


In [None]:
request = "I want to travel from paris to Berlin the 12/12/2023 to 15/12/2023 for 500 euros"



In [8]:
def create_app(client):
    
    #Create app 
    app_id = client.apps.add(appDefinition)

    #Get app id -necessary for all other changes 
    print("Created LUIS app with ID {}".format(app_id))
    return app_id
    
    

def add_intents(client, app_id, app_version):
    """Creation of intents.
    Parameters: app_id (LUIS App ID), app_version(LUIS App version)
    Outputs: intents' ID 
    """
    
    #Create intents list
    intents_list = ["BookFlight"] 
    
    for intent in intents_list:
        intentID = client.model.add_intent(app_id, app_version, intent)
        print("{} ID {} added.".format(intent, intentID))
        
        

def add_entities(client, app_id, app_version):
    
    #Define and add machine-learned entity to app 
    origine_ID = client.model.add_entity(app_id, app_version, name="or_city")
    print("Entity {} {} added.".format("or_city", origine_ID))

    destination_ID = client.model.add_entity(app_id, app_version, name='dst_city')
    print("Entity {} {} added.".format('dst_city', destination_ID))

    start_date_ID = client.model.add_entity(app_id, app_version, name='str_date')
    print("Entity {} {} added.".format('str_date', start_date_ID))

    end_date_ID = client.model.add_entity(app_id, app_version, name='end_date')
    print("Entity {} {} added.".format('end_date', end_date_ID))

    budget_ID = client.model.add_entity(app_id, app_version, name='budget')
    print("Entity {} {} added.".format('budget', budget_ID))

    # # Add prebuilt entities
    # datetime_ID = client.model.add_prebuilt(app_id, app_version, prebuilt_extractor_names=['datetimeV2'])
    # print("Prebuit Entity {} {} added.".format('datetime', datetime_ID))
    

def create_train_utterance(intent, utterance, *labels):    
    """LUIS expects a specifif data format for TRAIN data:
        this function creates these formatted utterances.
            Parameters:
                intent: the intent for which the utterances are associated
                utterance: a batch of utterances
                labels: 
                    - key/value pair for entities 
                    - key/value pair for char start index (startCharIndex)
                    - key value pair for char end index (endCharIndex)
            Outputs:
                the formatted data, including :
                    - text, intentName, 
                    - a list of dictionary of entityLabels and char's indexes
    """

    text = utterance.lower()

    def label(name, value):
        value= value.lower()
        start = text.index(value)

        return dict(
                    entityName=name,
                    startCharIndex=start,
                    endCharIndex=start + len(value))

    return dict(text=text, intentName=intent,
                entityLabels=[label(n,v) for (n,v) in labels])



def convert_as_utterance(data, intentCall='BookFlight', df='Train'):
    """Call data structure for 'Train' or 'Test'.
            Parameters: 
                dataframe, name of intent, train/test indication
            Outputs:
                my_data : transformed data
    """
    
    utterances_data= []
    
    #Exclude entities with nan values from the list
    nan_list = ["nan", "Nan", "", np.nan, None]
    
    #Iterate over the rows
    for index, row in data.iterrows(): #data.iterrows() is series where index are entities' names
        
        #Create a list of entities'tuple (key, value)
        entities = []
        if row.or_city not in nan_list:
            entities.append(("or_city", row.or_city))
        if row.dst_city not in nan_list:
            entities.append(('dst_city', row.dst_city))
        if row.str_date not in nan_list:
            entities.append(('str_date', row.str_date))
        if row.end_date not in nan_list:
            entities.append(('end_date', row.end_date))
        if row.budget not in nan_list:
            entities.append(('budget', row.budget))
            
        #Call the function to create the LUIS data in correct format 
        if df == 'Train':
            output = create_train_utterance(intentCall, row.text, *entities)
        if df == 'Test':
            output = create_test_utterance(intentCall, row.text, *entities)
            
        #Save outputs in utterances_data
        utterances_data.append(output)
        
    print('Length Utterances data', len(utterances_data))
    
    #Format to JSON and save 
    content = json.dumps(utterances_data)
    my_data = json.loads(content)
    
    return my_data
        
        
def add_utterances_to_luis(client, app_id, app_version, my_data):
    #Collect utterances data 
    utterances = my_data
    
    #Add the utterances in batch (max_batch_size=100 utterances for one call)
    client.examples.batch(app_id, app_version, utterances)
        


def train_app(client, app_id, app_version):
    """Train the LUIS app when all the utterances are uploaded.
            Parameters:
                app_id : LUIS App ID
                app_version : Luis App version
            Outputs:
                None (the trained status can be checked on LUIS portal)
    """
    client.train.train_version(app_id, app_version)
    waiting = True
    
    while waiting:
        info = client.train.get_status(app_id, app_version)
        
        # get_status returns a list of training statuses, one for each model
        # Loop through them and make sure all are done
        waiting = any(
            map(
                lambda x: 'Queued' == x.details.status or 'InProgress' == x.details.status, info))
        
        if waiting:
            print ("Waiting 10 seconds for training to complete...")
            time.sleep(10)

        else:
            print('Trained')
            waiting = False
                
        
        
def publish_app(client, app_id, app_version):
    """Publish LUIS application.
            Parameters:
                app_id : LUIS App ID
                app_version : Luis App version
            Outputs:
                None
                (On LUIS portal : endpoint URL )
    """
    # Mark the app as public so we can query it using any prediction endpoint
    client.apps.update_settings(app_id, is_public=True)
    
    responseEndpointInfo = client.apps.publish(
        app_id, 
        app_version,
        is_staging=False)

    print('Application published. Endpoint URL: ', 
          responseEndpointInfo.endpoint_url)
        
        
        
        
def create_test_utterance(intent, utterance, *labels):
    """LUIS expects a specifif data format for TEST data:
        this function creates these formatted utterances.
            Parameters:
                intent: the intent for which the utterances are associated
                utterance: a batch of utterances
                labels: 
                    - key/value pair for entities 
                    - key/value pair for char start position (startPos)
                    - key value pair for char end position (endPos)
            Outputs:
                the formatted data, including :
                    - text, intent, 
                    - a list of dictionary of entities and char's positions
    """

    text = utterance.lower()

    def label(name, value):
        value = value.lower()
        start = text.index(value)
        return dict(entity=name, startPos=start,
                    endPos=start + len(value))

    return dict(text=text, intent=intent,
                entities=[label(n, v) for (n, v) in labels])        

    
    
def predict(clientRuntime):
    """ Test LUIS prediction capabilities
    """
    request = {
        'query':'book a flight from Tunis to Toronto between 22 October 2021 to 5 November 2021, for a budget of $3500'
        }
    
    # The slot name parameter must be specified (staging or production)
    # For version 0.2.0, use "resolve" method
    # For version 0.7.0, use "get_slot_prediction" method
    response = clientRuntime.prediction.resolve(LUIS_APP_ID, query=request)

    text = response.query
    top_intent = response.top_scoring_intent.intent
    all_entities = response.entities
    
    return text, top_intent, all_entities    

In [4]:
app_id = create_app(client)

Created LUIS app with ID 4a9683a7-525a-4df5-aa8d-e4f9f088ce51


In [10]:
app_id = '4a9683a7-525a-4df5-aa8d-e4f9f088ce51'

In [309]:
predictionRequest = {'query':'book a flight for 10 days from 10 September 2022 from Paris to Brazil with a budget of 4000€' }

# predictionResponse = clientRuntime.prediction.resolve(LUIS_APP_ID, predictionRequest)
predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
predictionResponse

In [36]:
for i in predictionResponse.entities:
    print(i.deserialize)

<bound method Model.deserialize of <class 'azure.cognitiveservices.language.luis.runtime.models.entity_model_py3.EntityModel'>>
<bound method Model.deserialize of <class 'azure.cognitiveservices.language.luis.runtime.models.entity_model_py3.EntityModel'>>


In [6]:
add_intents(client, app_id, versionId)

BookFlight ID 8c448052-a758-48ac-affe-9c5789be4764 added.


In [7]:
#Add entities 
add_entities(client, app_id, versionId)

Entity or_city 998a0f9b-b86e-455b-abb6-b8d139ee23a0 added.
Entity dst_city 988d30fb-09c6-45f1-b042-0dc9fe1f1d60 added.
Entity str_date 549dd178-cac0-4ec9-a073-24ab8f015a8b added.
Entity end_date 0edf0247-6490-4151-ae0b-9bebe92b6c79 added.
Entity budget 667e29b6-ba6c-4e07-9d26-ca52177b22d6 added.


In [8]:
train_df = os.path.join("./luis_app/data/train_df.csv")
test_df = os.path.join("./luis_app/data/test_df.csv")

In [9]:
train_df = pd.read_csv(train_df)
test_df = pd.read_csv(test_df)

In [10]:
train_df.shape, test_df.shape

((900, 6), (200, 6))

In [11]:
train_df.head()

Unnamed: 0,text,or_city,dst_city,str_date,end_date,budget
0,I am a world-renowned pastry chef and I need t...,,St. Petersburg,,,
1,IM IN TIJUANA FIND ME A FLIGHT TO CURITIBA AUG...,TIJUANA,CURITIBA,AUG 27,SEPT 4,
2,Hi im from punta cana looking to go on a trip,punta cana,,,,
3,Where can this guy go for 2400??? I live in Me...,Mexico City,Milan,,,2400.0
4,Are there 5 star hotels in Kyoto?,,Kyoto,,,


In [12]:
bookFlight_utterance = convert_as_utterance(train_df, intentCall='BookFlight', df='Train') #Convert data as utterances

Length Utterances data 900


In [13]:
with open('luis_app/data/my_train.json', 'w+') as f:
    json.dump(bookFlight_utterance, f) #Save the data

In [14]:
bookFlight_utterance[0]

{'text': 'i am a world-renowned pastry chef and i need to travel to st. petersburg - stat!',
 'intentName': 'BookFlight',
 'entityLabels': [{'entityName': 'dst_city',
   'startCharIndex': 58,
   'endCharIndex': 72}]}

In [15]:
client.examples.add(app_id, versionId, bookFlight_utterance[0])

<azure.cognitiveservices.language.luis.authoring.models._models_py3.LabelExampleResponse at 0x2615c919550>

In [16]:
#Add utterances 
for i in range(0, len(bookFlight_utterance), 100):
    j = i + 100
    if j > len(bookFlight_utterance):
        j = len(bookFlight_utterance)
    add_utterances_to_luis(client, app_id, versionId, bookFlight_utterance[i:j])

In [17]:
#Train LUIS app 
train_app(client, app_id, versionId)

Waiting 10 seconds for training to complete...
Waiting 10 seconds for training to complete...
Trained


In [18]:
#Create TEST data
test_utterance = convert_as_utterance(test_df, intentCall="BookFlight", df='Test')
with open('luis_app/data/my_test.json', 'w+') as f:
    json.dump(test_utterance, f) # Save the data

Length Utterances data 200


In [19]:
client.apps.update_settings(app_id, is_public=True)

<azure.cognitiveservices.language.luis.authoring.models._models_py3.OperationStatus at 0x2615c905c40>

In [20]:
responseEndpointInfo = client.apps.publish(app_id, versionId, is_staging=False)

In [21]:
#Publish app
publish_app(client, app_id, versionId)

Application published. Endpoint URL:  https://westeurope.api.cognitive.microsoft.com/luis/v2.0/apps/4a9683a7-525a-4df5-aa8d-e4f9f088ce51


In [308]:
# Production == slot name
predictionRequest = { 'query':'book a flight for 10 days from 10 September 2022 from Paris to Brazil with a budget of 4000€' }

predictionResponse = clientRuntime.prediction.get_slot_prediction(app_id, "Production", predictionRequest)
print("Top intent: {}".format(predictionResponse.prediction.top_intent))
print("Intents: ")

for intent in predictionResponse.prediction.intents:
    print("\t{}".format (json.dumps (intent)))
print("Entities: {}".format (predictionResponse.prediction.entities))

# Test scores entities

In [39]:
authoringKey ='58c164d9218d49fbbded211388bb06ad'
authoringEndpoint = 'https://luisoc-authoring.cognitiveservices.azure.com/'
predictionKey = '10cf4c925d3640e39afc9c76af3488c8'
predictionEndpoint = 'https://luis-oc.cognitiveservices.azure.com/'
LUIS_SLOT_NAME = 'Production'
LUIS_APP_ID = '4a9683a7-525a-4df5-aa8d-e4f9f088ce51'

appName = "FlightBooking2_" + str(uuid.uuid4())
versionId = "0.1"

client = LUISAuthoringClient(authoringEndpoint, CognitiveServicesCredentials(authoringKey))
clientRuntime = LUISRuntimeClient(predictionEndpoint, CognitiveServicesCredentials(predictionKey))

#Define app basics 
appDefinition = ApplicationCreateObject(name=appName, initial_version_id=versionId, culture='fr-fr')


In [193]:
test_df = os.path.join("./luis_app/data/test_df.csv")
test_df = pd.read_csv(test_df)

In [292]:
def score(request, df):
    """ For each request, detect all entites and their best score"""
    response = clientRuntime.prediction.resolve(app_id, query=request)

    list_entities = []
    dict_entities = {}
    # columns = ["or_city", "dst_city", "str_date", "end_date", "budget"]
    # df_score = pd.DataFrame(columns=columns)

    for entity in response.entities:
        entity_type = entity.type
        score = entity.additional_properties.get("score")

        if entity_type in dict_entities.keys():
            old_score = dict_entities.get(entity_type)
            top_score = max(old_score, score)
            dict_entities[entity_type] = top_score

        else:
            dict_entities[entity.type] = score

    df_score = pd.concat([df, pd.DataFrame([dict_entities])], ignore_index =True)

    return df_score

In [293]:
columns = ["or_city", "dst_city", "str_date", "end_date", "budget"]
df_score = pd.DataFrame(columns=columns)
df_score

Unnamed: 0,or_city,dst_city,str_date,end_date,budget


In [294]:
for i in range(0, len(test_df["text"].iloc[:])):
    df_score = score(test_df["text"].iloc[i], df_score)

In [296]:
df_score

Unnamed: 0,or_city,dst_city,str_date,end_date,budget
0,0.662568,0.705345,0.974984,0.84059,
1,0.768433,,0.893045,,
2,0.952819,0.503639,,,
3,0.999516,0.999948,,,
4,0.594178,,0.999207,0.998143,
...,...,...,...,...,...
195,0.999851,,,,0.998493
196,0.817052,0.995417,,,
197,,0.979467,,,
198,0.975755,0.99819,,,


In [299]:
mean_df = df_score[columns].mean()

In [307]:
for i in mean_df.index:
    print("Mean score for {} entity: {}".format(i, mean_df.loc[i]))

Mean score for or_city entity: 0.9412346803676472
Mean score for dst_city entity: 0.9525263956250005
Mean score for str_date entity: 0.9467749378688524
Mean score for end_date entity: 0.9036219343589741
Mean score for budget entity: 0.8815136381818183
