<img src="https://user.oc-static.com/upload/2019/10/22/15717382019055_Capture%20d%E2%80%99e%CC%81cran%202019-10-22%20a%CC%80%2011.50.29.png">



# Déploiement d'un modèle 

Dans cette partie, nous déploierons le modèle LSTM bidirectionnel contenant la couche d'embedding keras sur les services de Microsoft Azure.

In [1]:
# Import des bibliothèques nécéssaires
import numpy as np
import pandas as pd
from gensim.corpora import Dictionary
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.layers import LSTM, Dense, Embedding, Bidirectional, Dropout
from tensorflow.keras.models import Sequential
import os
import joblib

from utils import load_data
import matplotlib.pyplot as plt
import glob
import numpy as np
from sklearn.linear_model import LogisticRegression
from azureml.core import Workspace


data_folder = os.path.join(os.getcwd(), "/tmp/qs_data")
os.makedirs(data_folder, exist_ok=True)

In [2]:
# Import du jeu de données
df_ = pd.read_csv('/utile/data.csv')

# Constitution du dictionnaire 
dct = Dictionary(df_.text.apply(lambda x: x.lower().split()))

def return_index(X):
    keys = dct.token2id.keys()
    
    tokens = []
    for x in X.lower().split():
        if x in keys:
            tokens.append(dct.token2id[x])
    return tokens


y = df_.label
X = sequence.pad_sequences(df_.text.apply(return_index),
                                 value=0,
                                 padding='post', # to add zeros at the end
                                 truncating='post', # to cut the end of long sequences
                                 maxlen=32) # the length we want

# Séparation du jeu de données en jeu de données d'entrainement et de validation
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)


# Création du modèle LSTM bidirectionnelle
def my_LSTM5(len_dict):
    model = Sequential()
    model.add(Embedding(input_dim=len_dict, output_dim=12, input_length=32))
    model.add(Bidirectional(LSTM(units=8, return_sequences=True)))
    model.add(Bidirectional(LSTM(units=8, return_sequences=False)))
    model.add(Dense(units=1, activation='sigmoid'))
    return model

model5 = my_LSTM5(len(dct))
model5.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

In [3]:
import mlflow

# Connection à l'espace de travail d'Azure
ws = Workspace.from_config()

# create experiment and start logging to a new run in the experiment
name = "model5-LSTM"

# set up MLflow to track the metrics
mlflow.set_tracking_uri(ws.get_mlflow_tracking_uri())
mlflow.set_experiment(name)
mlflow.autolog()


# Entrainement du modèle
with mlflow.start_run() as run:
    model5.fit(x=X_train, y=y_train, validation_data=(X_test, y_test), epochs=100, batch_size=64, verbose=0)

# sauvegarde du modèle et du dictionnaire
model5.save('./utile/model')
joblib.dump(dct,"./utile/model/dct.pkl")

2022/02/10 20:08:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2022/02/10 20:08:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for tensorflow.
2022/02/10 20:08:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.
2022/02/10 20:08:08 INFO mlflow.pyspark.ml: No SparkSession detected. Autologging will log pyspark.ml models contained in the default allowlist. To specify a custom allowlist, initialize a SparkSession prior to calling mlflow.pyspark.ml.autolog() and specify the path to your allowlist file via the spark.mlflow.pysparkml.autolog.logModelAllowlistFile conf.
2022/02/10 20:08:08 INFO mlflow.tracking.fluent: Autologging successfully enabled for pyspark.ml.
Using TensorFlow backend.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./model/assets


['./model/dct.pkl']

In [4]:
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import AciWebservice, Webservice

# Sauvegarde des modèle dans l'espacce de travail d'Azure
model = Model.register(model_path="./utile/model",
                       model_name="Model_deep",
                       tags={'area': "NLP", 'type': "tokenizer"},
                       description="First Model save",
                       workspace=ws)

dct = Model.register(model_path="./utile/model/dct.pkl",
                       model_name="dct",
                       tags={'area': "NLP", 'type': "Dict"},
                       description="Dictionary of tokens",
                       workspace=ws)

Registering model Model_deep
Registering model dct


In [5]:
# Création des ressource environements de déploiement
source_directory = "./utile/dependances"

env_tf_sk = Environment('tensorflow-scikit')
env_tf_sk.python.conda_dependencies.add_pip_package("azureml-core")
env_tf_sk.python.conda_dependencies.add_pip_package("gensim")
env_tf_sk.python.conda_dependencies.add_pip_package("joblib")
env_tf_sk.python.conda_dependencies.add_pip_package("scikit-learn")
env_tf_sk.python.conda_dependencies.add_pip_package("tensorflow")


inference_config = InferenceConfig(source_directory=source_directory,
                                   entry_script="./utile/y/score.py",
                                   environment=env_tf_sk)

In [9]:
# Configuration du déploiement
deployment_config = AciWebservice.deploy_configuration(cpu_cores = 2, memory_gb = 4,auth_enabled=True)

# Création d'une instance de déploiement
service = Model.deploy(
    workspace = ws,
    name = "model5",
    models = [dct, model],
    inference_config = inference_config,
    deployment_config = deployment_config)

# Déploiement du modèle
service.wait_for_deployment(show_output = True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-02-10 20:16:26+00:00 Creating Container Registry if not exists.
2022-02-10 20:16:26+00:00 Registering the environment.
2022-02-10 20:16:27+00:00 Use the existing image.
2022-02-10 20:16:27+00:00 Generating deployment configuration.
2022-02-10 20:16:29+00:00 Submitting deployment to compute.
2022-02-10 20:16:33+00:00 Checking the status of deployment model5..
2022-02-10 20:19:22+00:00 Checking the status of inference endpoint model5.
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [10]:
# Affichage des identifiant de connexion
primary, secondary = service.get_keys()
print(primary)
print(service.scoring_uri)

ddAFENhn8UuDsK9vnims8PWUWcP1ge41
http://0b15f0b2-7c89-4e2d-8036-33a163069b86.westeurope.azurecontainer.io/score


In [16]:
# Appel de la fonction
import json
input_payload = json.dumps({
    'data': ["bad I hate horible", "Hello world good hapy", "bad"],
    'method':"prediction_with_neutral"
})

output = service.run(input_payload)
print(output)

[[1.5396204844364547e-07], [1.5396204844364547e-07], [1.5396204844364547e-07]]


In [54]:
# Test de la fonction
from functions import *
get_model(['Good', 'Bad'])

array([1, 1], dtype=int32)

# Conclusion

À l'issue de notre analyse comparative des différentes propositions d'approche permettant de détecter le sentiment négatif associé à un tweet, nous pouvons déterminer quel modèle sera le plus adapté.

Pour la construction d'un modèle de A à Z la solution deeplearning LSTM bidirectionnel avec couche d'embedding offre une maîtrise complète de son modèle avec des temps de calcul relativement important pour la phase d'entraînement qui sont ensuite quasi inexistants dans la phase de prédiction contrairement aux approches API sur étagère ou API sur-mesure simple. De plus, le déploiement du modèle complète les caractéristiques de cette approche bien que cette démarche se révèle la plus technique à mettre en place.