In [47]:
import p10_functions as pf

import os
import json
import time
import datetime
from pprint import pprint

import pandas as pd
from msrest.authentication import CognitiveServicesCredentials
from azure.cognitiveservices.language.luis.authoring import LUISAuthoringClient
from azure.cognitiveservices.language.luis.runtime import LUISRuntimeClient

In [18]:
root_dir = 'D:/OpenClassrooms/projet_10/'
data_file = os.path.join(root_dir, 'data', 'frames', 'frames.json')

region_authoring = 'westeurope'
region_url = f'https://{region_authoring}.api.cognitive.microsoft.com'

sleep_time = 0.25

In [19]:
# authoring and prediction ressources

AUTHORING_ENDPOINT = 'https://oc-10-luis-author-2.cognitiveservices.azure.com/'
AUTHORING_KEY = '7f00381d23d34a9eabe107b03e459cdc'

PREDICTION_ENDPOINT ='https://oc-10-luis-pred-2.cognitiveservices.azure.com/'
PREDICTION_KEY = '03773c8be49140af87a85494568f93fc'


# existing apps info
existing_apps = [
    {
        'app_name': 'luis_2023-02-24_18:26:21', 
        'app_id': 'e7bd1685-fe52-42a9-860a-970bbd6c9eee', 
    }, 
    {
        'app_name': 'luis_2023-03-03_11:07:58', 
        'app_id': '0abd15c6-f743-4ca3-9775-1adf328dfc24', 
    }, 
    {
        'app_name': 'luis_2023-03-03_18:15:50', 
        'app_id': '26c13481-467f-49d7-8a55-e72a7f51fdcd', 
    }, 
    {
        'app_name': 'luis_2023-03-03_18:27:05', 
        'app_id': 'c78c19a0-1730-441c-9d70-b93e292192b1', 
    }, 
    {
        'app_name': 'luis_2023-03-03_18:27:05', 
        'app_id': 'c78c19a0-1730-441c-9d70-b93e292192b1', 
    }, 
]

In [20]:
# client credentials

auth_client = LUISAuthoringClient(
    AUTHORING_ENDPOINT, 
    CognitiveServicesCredentials(AUTHORING_KEY)
)

In [21]:
create_new_luis = False

# if use old app: choose an app
app_idx = 4

In [22]:
if create_new_luis:
    # Create new LUIS
    version_id = '0.1'
    app_name = 'luis_' + datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')

    app_id = auth_client.apps.add({
        'name': app_name,
        'initial_version_id': version_id,
        'culture': 'en-us',
    })

else:
    # get existing app_name, app_id, version_id
    existing_app = existing_apps[app_idx]
    app_name = existing_app['app_name']
    app_id = existing_app['app_id']
    version_id = '0.1'
    

print('app_name:', app_name)
print('app_id:', app_id)
print('version_id:', version_id)

time.sleep(sleep_time)

app_name: luis_2023-03-03_18:27:05
app_id: c78c19a0-1730-441c-9d70-b93e292192b1
version_id: 0.1


## Add information into the model

### Entity

In [23]:
entities = [
    'or_city',
    'dst_city',
    'str_date',
    'end_date',
    'budget'
]

for entity in entities:
    entity_id = auth_client.model.add_entity(app_id, 
        version_id, 
        name=entity 
    )
    print(f'"{entity}" created with id {entity_id}.')

    time.sleep(sleep_time)

"or_city" created with id eae8be4f-7b2e-4336-a22f-82ea6c334f36.
"dst_city" created with id 7173f80e-a945-4be5-9076-85da158ebd07.
"str_date" created with id 415549ec-84f6-4eb9-ad07-84c8f51f7c92.
"end_date" created with id 4ee0f0d4-685e-41f0-aac7-c53f0c37d88e.
"budget" created with id e84a2520-34b0-44e9-bda0-e3fe5b09ac51.


### Intent

In [24]:
intent_name = "BookFlight"
intent_id = auth_client.model.add_intent(
    app_id,
    version_id,
    intent_name
)

print(f'{intent_name} intent created with id {intent_id}')

time.sleep(sleep_time)

BookFlight intent created with id 875ef2aa-5b1d-408b-ae76-7ca8158f1523


### Utterance (train and test)

In [25]:
entities = [
    'or_city',
    'dst_city',
    'str_date',
    'end_date',
    'budget'
]

data = pd.read_json(data_file)
print(data.shape)

luis_data = pf.convert_data(data, entities)
print(len(luis_data))

(1369, 5)
4135


In [26]:
val_set_size = 500

train_set, val_set = pf.create_train_test_sets(val_set_size, luis_data)
print(len(train_set))
print(len(val_set))

3635
500


### Upload train utterances

In [27]:
# upload all luis_data by batch of 100
batch_size = 100

for i in range(0, len(train_set), batch_size):
    j = i + batch_size
    if j > len(train_set):
        j = len(train_set)

    auth_client.examples.batch(
        app_id, 
        version_id, 
        train_set[i:j], 
        {'enableNestedChildren':True}
    )

    time.sleep(sleep_time)

    print(f'batch: {i}-{j-1}')
print(f'\n{j} example utterances added.')

batch: 0-99
batch: 100-199
batch: 200-299
batch: 300-399
batch: 400-499
batch: 500-599
batch: 600-699
batch: 700-799
batch: 800-899
batch: 900-999
batch: 1000-1099
batch: 1100-1199
batch: 1200-1299
batch: 1300-1399
batch: 1400-1499
batch: 1500-1599
batch: 1600-1699
batch: 1700-1799
batch: 1800-1899
batch: 1900-1999
batch: 2000-2099
batch: 2100-2199
batch: 2200-2299
batch: 2300-2399
batch: 2400-2499
batch: 2500-2599
batch: 2600-2699
batch: 2700-2799
batch: 2800-2899
batch: 2900-2999
batch: 3000-3099
batch: 3100-3199
batch: 3200-3299
batch: 3300-3399
batch: 3400-3499
batch: 3500-3599
batch: 3600-3634

3635 example utterances added.


## Train model with utterances

In [28]:
async_training = auth_client.train.train_version(app_id, version_id)
is_trained = async_training.status == 'UpToDate'

trained_status = ['UpToDate', 'Success']
while not is_trained:
    time.sleep(5)
    status = auth_client.train.get_status(app_id, version_id)
    is_trained = all(m.details.status in trained_status for m in status)
print('LUIS trained.')

time.sleep(sleep_time)

LUIS trained.


## Publish model on staging slot

In [36]:
slot_name = 'staging'

In [29]:
# publish on staging slot
publish_result = auth_client.apps.publish(
    app_id, 
    version_id, 
    is_staging=True
)

time.sleep(sleep_time)

## Validation set

In [37]:
# FIRST GO ON LUIS PORTAL TO LINK LUIS TO THE PREDICTION RESSOURCE

In [38]:
# use prediction ressource for runtime
runtimeCredentials = CognitiveServicesCredentials(PREDICTION_KEY)
clientRuntime = LUISRuntimeClient(endpoint=PREDICTION_ENDPOINT, credentials=runtimeCredentials)

In [39]:
# predict validation set

results = []
for utterance in val_set:
    predictionRequest = {'query': utterance['text']}
    result = clientRuntime.prediction.get_slot_prediction(
        app_id, 
        slot_name, 
        predictionRequest, 
        show_all_intents=True, 
        verbose=True, 
        log=False
    )
    time.sleep(sleep_time)
    results.append(result.as_dict())

In [40]:
# save results to file
results_path = root_dir + 'scripts/' + f'results_{len(val_set)}.txt'

with open(results_path,'w') as f:
    f.write(json.dumps(results))

print(results_path)

D:/OpenClassrooms/projet_10/scripts/results_500.txt


In [48]:
with open(results_path) as f:
    results = eval(f.read())

print(len(results), '\n')
pprint(results[0])

500 

{'prediction': {'entities': {'$instance': {'dst_city': [{'length': 15,
                                                         'modelType': 'Entity '
                                                                      'Extractor',
                                                         'modelTypeId': 1,
                                                         'recognitionSources': ['model'],
                                                         'score': 0.9406944,
                                                         'startIndex': 129,
                                                         'text': 'rio de '
                                                                 'janeiro.',
                                                         'type': 'dst_city'}],
                                           'end_date': [{'length': 13,
                                                         'modelType': 'Entity '
                                                            

In [42]:
val_list = []
for i, val in enumerate(val_set):
    intent = val['intent']
    val_list.append(intent)
print(val_list[:5])

pred_list = []
for i, pred in enumerate(results):
    intent = pred['prediction']['top_intent']
    pred_list.append(intent)
print(pred_list[:5])

['BookFlight', 'BookFlight', 'BookFlight', 'BookFlight', 'BookFlight']
['BookFlight', 'BookFlight', 'BookFlight', 'BookFlight', 'BookFlight']


In [45]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(val_list, pred_list, labels=['BookFlight', 'None'])
matrix

array([[499,   1],
       [  0,   0]], dtype=int64)

In [None]:
beta = 1

from sklearn.metrics import precision_recall_fscore_support
precision, recall, f_score, support = precision_recall_fscore_support(
    val_list, 
    pred_list, 
    labels=['BookFlight', 'None'], 
    beta=beta
)

print(precision)
print(recall)
print(f_score)
print(support)

[1. 0.]
[0.998 0.   ]
[0.998999 0.      ]
[500   0]


  _warn_prf(average, modifier, msg_start, len(result))


## Production setup

In [50]:
slot_name = 'production'

In [51]:
# publish on production slot
publish_result = auth_client.apps.publish(
    app_id, 
    version_id, 
    is_staging=False
)

time.sleep(sleep_time)

In [52]:
# make LUIS app public
res = auth_client.apps.update_settings(app_id, is_public=True)

time.sleep(sleep_time)

## Production test

In [57]:
text_to_pred = 'I want to book a flight from paris to london. I can spend 500 euros.'

#### Simple query

In [53]:
predictionRequest = {'query': text_to_pred}

predictionResponse = clientRuntime.prediction.get_slot_prediction(
    app_id, 
    slot_name, 
    predictionRequest
    )
print(predictionResponse)

time.sleep(sleep_time)

{'additional_properties': {}, 'query': 'I want to book a flight from paris to london. I can spend 500 euros.', 'prediction': <azure.cognitiveservices.language.luis.runtime.models._models_py3.Prediction object at 0x000002A79BD0F700>}


In [54]:
print(f'Top intent: {predictionResponse.prediction.top_intent}', '\n')

print('Entities:')
pprint(predictionResponse.prediction.entities)

Top intent: BookFlight 

Entities:
{'budget': ['500 euros.'], 'dst_city': ['london.'], 'or_city': ['paris']}


#### Verbose query

In [55]:
predictionRequest = {'query' : text_to_pred}

predictionResponse = clientRuntime.prediction.get_slot_prediction(
    app_id, 
    slot_name, 
    predictionRequest, 
    show_all_intents=True,  # shows score of all intents
    verbose=True,   # more detail on each entity
    log=False    # to save queries
    )
print(predictionResponse)

time.sleep(sleep_time)

{'additional_properties': {}, 'query': 'I want to book a flight from paris to london. I can spend 500 euros.', 'prediction': <azure.cognitiveservices.language.luis.runtime.models._models_py3.Prediction object at 0x000002A79BD0FE20>}


In [56]:
print(f'Top intent: {predictionResponse.prediction.top_intent}', '\n')

print('Intents:')
for intent in predictionResponse.prediction.intents:
    score = predictionResponse.prediction.intents[intent].score
    print(f'  {intent}: {score}')
print('\n')

print('Entities:', '\n')
pprint(predictionResponse.prediction.entities['$instance'])

Top intent: BookFlight 

Intents:
  BookFlight: 0.999949
  None: 0.00046988076


Entities: 

{'budget': [{'length': 10,
             'modelType': 'Entity Extractor',
             'modelTypeId': 1,
             'recognitionSources': ['model'],
             'score': 0.9933378,
             'startIndex': 58,
             'text': '500 euros.',
             'type': 'budget'}],
 'dst_city': [{'length': 7,
               'modelType': 'Entity Extractor',
               'modelTypeId': 1,
               'recognitionSources': ['model'],
               'score': 0.92934126,
               'startIndex': 38,
               'text': 'london.',
               'type': 'dst_city'}],
 'or_city': [{'length': 5,
              'modelType': 'Entity Extractor',
              'modelTypeId': 1,
              'recognitionSources': ['model'],
              'score': 0.9801513,
              'startIndex': 29,
              'text': 'paris',
              'type': 'or_city'}]}
