In [49]:
%load_ext autoreload
%autoreload 2

from notebook import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [99]:
# # Copie les notebooks et supprime les sorties.
# copy_and_clean_notebooks()

[NbConvertApp] Converting notebook analyse_insatisfactions.ipynb to notebook
[NbConvertApp] Writing 11338 bytes to analyse_insatisfactions_no_out.ipynb
[NbConvertApp] Converting notebook create_luis_model.ipynb to notebook
[NbConvertApp] Writing 33548 bytes to create_luis_model_no_out.ipynb
[NbConvertApp] Converting notebook update_luis_model.ipynb to notebook
[NbConvertApp] Writing 23123 bytes to update_luis_model_no_out.ipynb


# Création d'une nouvelle branche

# Chargement des ressources

## Chargement du workspace

In [10]:
# On charge l’espace de travail Azure Machine Learning existant
ws = Workspace.from_config()

## Chargement du magasin des données

In [73]:
# On charge le magasin de données par défaut
datastore = ws.get_default_datastore()

## Chargement des paramètres de LUIS

In [2]:
env = LUISEnv("../P10_02_luis/.env")

In [4]:
with open("../P10_02_luis/params.json") as f:
    params = json.load(f)

In [6]:
model_version = str(params["model"]["versionId"])
ds_name = params["dataset"]["name"]
ds_version = params["dataset"]["version"]

# Mise à jour du jeu de données

## Chargement des nouvelles utterances

### Intent `book_flight`

In [34]:
# On colle les nouveaux textes à labelliser
book_flight_texts = [
    "Book me a flight from London to Paris tomorrow. I have only 100€."
]

len(book_flight_texts)

1

### Intent `None`

In [35]:
# On colle les nouveaux textes à labelliser
none_texts = [
    "Hey !!!"
]

len(none_texts)

1

## Chargement du précédent jeu de données

In [11]:
with tempfile.TemporaryDirectory() as tmp_dir_name:
    dataset = Dataset.get_by_name(ws, **params["dataset"])
    dataset.download(target_path=tmp_dir_name, overwrite=False)

    # On charge le jeu d'entrainement
    file_path = os.path.join(tmp_dir_name, "utterances_train.json")
    with open(file_path) as f:
        utterances_train = json.load(f)

    # On charge le jeu de test
    file_path = os.path.join(tmp_dir_name, "utterances_test.json")
    with open(file_path) as f:
        utterances_test = json.load(f)

In [31]:
# Extraction des textes du jeu de données pour l'intent "book_flight"
old_texts = get_texts_from_dataset(
    utterances_train,
    utterances_test,
    "book_flight"
)

# Extraction des textes du jeu de données pour l'intent "None"
old_texts += get_texts_from_dataset(
    utterances_train,
    utterances_test,
    "None"
)

## Suppression des doublons

### Intent `book_flight`

In [36]:
# On supprime les doublons pour l'intent "book_flight"
book_flight_texts = [i for i in book_flight_texts if i not in old_texts]

len(book_flight_texts)

1

### Intent `None`

In [37]:
# On supprime les doublons pour l'intent "None"
none_texts = [i for i in none_texts if i not in old_texts]

len(none_texts)

1

## Transformation des données

### Intent `book_flight`

In [41]:
# On convertit les textes au format LUIS
new_utterances = texts_to_luis_utterances(book_flight_texts, "book_flight")

[{'text': 'Book me a flight from London to Paris tomorrow. I have only 100€.',
  'intent': 'book_flight',
  'entities': []}]

### Intent `None`

In [42]:
# On convertit les textes au format LUIS
new_utterances += texts_to_luis_utterances(none_texts, "None")

## Labellisation des utterances

### Création d'un modèle LUIS pour la labellisation

In [44]:
labellisation_app_version = "labellisation"

In [45]:
create_new_version(env, labellisation_app_version, params["model"], new_utterances)

### Labellisation manuelle

### Téléchargement des utterances labellisées

In [None]:
new_utterances = get_utterances(env, labellisation_app_version)

In [None]:
new_utterances

### Suppression du modèle LUIS

In [54]:
delete(env, labellisation_app_version)

## Split des données

In [63]:
# On va prendre 70% des données pour le jeu d'entrainement
train_nb = int(len(new_utterances) * 0.7)

1

In [61]:
# On mélanges les utterances
random.shuffle(new_utterances)

In [64]:
# On crée le jeu d'entrainement
new_utterances_train = new_utterances[:train_nb]

In [69]:
# On crée le jeu de test
new_utterances_test = new_utterances[train_nb:]

## Ajout des précédentes utterances

In [68]:
utterances_train += new_utterances_train

In [70]:
utterances_test["LabeledTestSetUtterances"] += new_utterances_test

## Enregistrement des datasets

In [74]:
with tempfile.TemporaryDirectory() as tmp_dir_name:
    # On enregistre les données
    file_path = os.path.join(tmp_dir_name, "utterances_train.json")
    with open(file_path, "w") as f:
        json.dump(utterances_train, f)
        
    file_path = os.path.join(tmp_dir_name, "utterances_test.json")
    with open(file_path, "w") as f:
        json.dump(utterances_test, f)
    
    # On upload tous les fichiers dans le datastore
    ds = Dataset.File.upload_directory(
        tmp_dir_name,
        target=(datastore, "utterances/" + datetime.now().strftime("%Y_%m_%d")),
        overwrite=True,
        show_progress=True
    )

Validating arguments.
Arguments validated.
Uploading file to utterances/2021_12_27
Uploading an estimated of 2 files
Uploading /tmp/tmp918owj52/utterances_test.json
Uploaded /tmp/tmp918owj52/utterances_test.json, 1 files out of an estimated total of 2
Uploading /tmp/tmp918owj52/utterances_train.json
Uploaded /tmp/tmp918owj52/utterances_train.json, 2 files out of an estimated total of 2
Uploaded 2 files
Creating new dataset


In [75]:
ds = ds.register(
    workspace=ws,
    name="utterances",
    description="Train and test utterances",
    create_new_version=True
)

In [76]:
ds.version

4

# Enregistrement des paramètres de LUIS sur Github

## Mise à jour des paramètres de LUIS

In [None]:
new_params = params.copy()

In [106]:
tmp = params.copy()
tmp["model"]["utterances"] = []

In [108]:
print(json.dumps(tmp["model"], indent=2))

{
  "luis_schema_version": "7.0.0",
  "intents": [
    {
      "name": "book_flight",
      "features": []
    },
    {
      "name": "None",
      "features": []
    }
  ],
  "entities": [
    {
      "name": "budget",
      "children": [],
      "roles": [],
      "features": [
        {
          "modelName": "number",
          "isRequired": true
        }
      ]
    },
    {
      "name": "from_city",
      "children": [],
      "roles": [],
      "features": [
        {
          "featureName": "from_phrase_list",
          "isRequired": false
        },
        {
          "modelName": "geographyV2",
          "isRequired": true
        }
      ]
    },
    {
      "name": "from_dt",
      "children": [],
      "roles": [],
      "features": []
    },
    {
      "name": "to_city",
      "children": [],
      "roles": [],
      "features": [
        {
          "featureName": "to_phrase_list",
          "isRequired": false
        },
        {
          "modelName": "geographyV

In [86]:
# new_model_version = float(model_version) + 0.1
# new_model_version = f"{new_model_version:0.1f}"

# new_params["model"]["versionId"] = new_model_version

In [87]:
new_params["dataset"]["version"] = ds.version

In [84]:
file_path = os.path.join("../P10_02_luis", "params.json")
with open(file_path, "w") as f:
    json.dump(new_params, f)

## Evaluation du nouveau modèle

In [88]:
tmp_app_version = "tmp"

In [89]:
create_new_version(env, tmp_app_version, new_params["model"], utterances_train)

In [92]:
train(env, tmp_app_version)

In [93]:
deploy(env, tmp_app_version, "staging")

In [95]:
res = evaluate(env, is_staging=True, utterances=utterances_test)
res

Unnamed: 0,model_name,model_type,precision,recall,f_score
0,book_flight,Intent Classifier,0.97,1.0,0.99
1,,Intent Classifier,1.0,0.98,0.99
2,from_dt,Entity Extractor,0.89,1.0,0.94
3,to_dt,Entity Extractor,0.98,0.92,0.95
4,budget,Entity Extractor,0.5,0.93,0.65
5,from_city,Entity Extractor,0.54,0.95,0.69
6,to_city,Entity Extractor,0.69,1.0,0.81


In [98]:
print("## Evaluation sur le jeu de test\n")

print(res.to_html(float_format=lambda x: f"{x:0.2f}"))

## Evaluation sur le jeu de test

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>model_name</th>
      <th>model_type</th>
      <th>precision</th>
      <th>recall</th>
      <th>f_score</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>book_flight</td>
      <td>Intent Classifier</td>
      <td>0.97</td>
      <td>1.00</td>
      <td>0.99</td>
    </tr>
    <tr>
      <th>1</th>
      <td>None</td>
      <td>Intent Classifier</td>
      <td>1.00</td>
      <td>0.98</td>
      <td>0.99</td>
    </tr>
    <tr>
      <th>2</th>
      <td>from_dt</td>
      <td>Entity Extractor</td>
      <td>0.89</td>
      <td>1.00</td>
      <td>0.94</td>
    </tr>
    <tr>
      <th>3</th>
      <td>to_dt</td>
      <td>Entity Extractor</td>
      <td>0.98</td>
      <td>0.92</td>
      <td>0.95</td>
    </tr>
    <tr>
      <th>4</th>
      <td>budget</td>
      <td>Entity Extractor</td>
      <td>0.50</td>
      <td

In [97]:
 delete(env, tmp_app_version)