# P7 : Détectez les Bad Buzz grâce au Deep Learning

## Enregistrement du modèle dans azure

## Importation des librairies

In [1]:
import os 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences 
from tensorflow.keras.layers import Dense, Embedding, LSTM, Dropout, SimpleRNN
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.initializers import Constant

## Connection à mon espace de travail

In [2]:
import azureml.core
from azureml.core import Workspace

# vérifier le numéro de version du SDK de base
print("Azure ML SDK Version: ", azureml.core.VERSION)

# charger la configuration de l’espace de travail à partir du fichier config.json dans le dossier courant.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

Azure ML SDK Version:  1.37.0
sept	southcentralus	djawed


## Créer une expèrience

In [3]:
nom_exp = 'tweet_analyser_1'

from azureml.core import Experiment
experience1 = Experiment(workspace=ws, name=nom_exp)

## Joindre à l'instance de calcul

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
import os

# choose a name for your cluster
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpu-cluster")
compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4)

# This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")


if compute_name in ws.compute_targets:
    compute_target = ws.compute_targets[compute_name]
    if compute_target and type(compute_target) is AmlCompute:
        print("found compute target: " + compute_name)
else:
    print("creating new compute target...")
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = vm_size,
                                                                min_nodes = compute_min_nodes, 
                                                                max_nodes = compute_max_nodes)

    # create the cluster
    compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
    
    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it will use the scale settings for the cluster
    compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
     # For a more detailed view of current AmlCompute status, use get_status()
    print(compute_target.get_status().serialize())

creating new compute target...
InProgress.
SucceededProvisioning operation finished, operation "Succeeded"
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Resizing', 'allocationStateTransitionTime': '2022-02-02T08:38:43.151000+00:00', 'errors': None, 'creationTime': '2022-02-02T08:38:42.714998+00:00', 'modifiedTime': '2022-02-02T08:38:46.317622+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT1800S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


## Charger les données du datastore

In [5]:
from azureml.core import Dataset, Run
nom_dataset = 'df'
twitter_data = Dataset.get_by_name(workspace=ws, name=nom_dataset)
df_tweet = twitter_data.to_pandas_dataframe()

## créer un répertoire

In [6]:
script_folder = os.path.join(os.getcwd(), "tf-ker_tweet")
os.makedirs(script_folder, exist_ok=True)
script_folder

'/mnt/batch/tasks/shared/LS_root/mounts/clusters/krairij1/code/Users/Krairij/tf-ker_tweet'

## Script de formation

In [7]:
%%writefile $script_folder/train.py

import argparse
import os
import numpy as np
import pandas as pd
import glob

import tensorflow as tf
from azureml.core import Run
from azureml.core import Dataset, workspace
from tensorflow.keras.layers import Dense, Flatten, Embedding, MaxPooling1D, Dropout
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential

ws = Workspace.from_config()

# Obtenir des données de formation
nom_dataset = 'df'
twitter_data = Dataset.get_by_name(workspace=ws, name=nom_dataset)
df_tweet = twitter_data.to_pandas_dataframe()

# partionner app-test
data_train, data_test = train_test_split(df_tweet, train_size=0.7, stratify=df_tweet.label, random_state=0)
tk = Tokenizer(num_words=None)
# Création du dictionnaire à partir des documents de l'échantillon d'apprentissage
tk.fit_on_texts(data_train.tweet)
# taille du dictionnaire
dico_size = len(tk.word_counts)
# transformation des documents en séquences de tokens
seq_train = tk.texts_to_sequences(data_train.tweet)
max_length = np.max(np.array([len(doc) for doc in seq_train]))
# marge_length
marge_length = 5
pad_train = pad_sequences(seq_train, maxlen=max_length + marge_length, padding='post')
seq_test = tk.texts_to_sequences(data_test.tweet)
pad_test = pad_sequences(seq_test, maxlen= max_length + marge_length, padding='post')
# créer un modèle 
# perception multicouche
#==> output_dim précise la taille de l'espace de représentation dans lequelle seront projetés les termes
#==> input_dim = dico_size + 1 à cause de l'index des termes commence à la colonne 1
#(la colonne 0 existe mais n'est pas associée à un  terme)
model_KSO = Sequential()
model_KSO.add(Embedding(input_dim = dico_size +1, output_dim = 10, input_length= max_length + marge_length))
# 2 opérations de régularisation 
model_KSO.add(MaxPooling1D(pool_size=5, strides=2))
model_KSO.add(Dropout(0.7))
# reprise du réseau habituel
model_KSO.add(Flatten())
model_KSO.add(Dense(units=1, activation="sigmoid"))
# paramétre de l'algorithme d'apprentissage
model_KSO.compile(loss="binary_crossentropy", optimizer = "adam", metrics=['accuracy'])

# lancement - une partie du train est réservée pour la validation c'est à dire pour un suivi plus réaliste des performance
history_KSO = model_KSO.fit(pad_train, data_train.label, epochs=5)
model_KSO.evaluate(pad_test, data_test.label)

# Obtenir le contexte d’exécution de l’expérience
run = Run.get_context()
# Enregistrer certaines sorties dans les journaux
para = history_KSO.params
loss = history_KSO.history['loss']
acc = history_KSO.history['accuracy']
run.log_list('params', para)
run.log_list('accuracy', acc)
run.log_list('loss', loss)

os.makedirs('./outputs/model', exist_ok=True)
# Le fichier de note enregistré dans le dossier des sorties est automatiquement téléchargé dans l’enregistrement de l’expérience
model_KSO.save('model_KSO_tweet')

Writing /mnt/batch/tasks/shared/LS_root/mounts/clusters/krairij1/code/Users/Krairij/tf-ker_tweet/train.py


## Créer un script de configuration d'exécution

In [8]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# to install required packages
envir = Environment('twitter-0-env')
cd = CondaDependencies.create(
    pip_packages=[
        'azureml-dataset-runtime[pandas,fuse]', 
        'azureml-defaults'
        ],
    conda_packages=[
        'pip',
        'joblib', 
        'matplotlib',
        'tensorflow',
        ])
# MAYBE NEED TO ADD=> conda_packages=['pip']

envir.python.conda_dependencies = cd

# Register environment to re-use later
envir.register(workspace = ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211124.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "twitter-0-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"


## Créer un ScriptRunConfig 

In [9]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=script_folder,
                      script='train.py', 
                      compute_target=compute_target,
                      environment=envir)

## Soumettre le travail
Lancez l’expérience en soumettant l’objet ScriptRunConfig. Et vous pouvez naviguer vers le portail Azure pour surveiller l’exécution.

In [10]:
run = experience1.submit(config=src)
run

Experiment,Id,Type,Status,Details Page,Docs Page
tweet_analyser_1,tweet_analyser_1_1643791180_aca4300b,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


## Analyse du moniteur

In [11]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

## Obtenir les résultats du journal une fois l’opération terminée
La formation sur les modèles a lieu en arrière-plan. Vous pouvez utiliser wait_for_completion pour bloquer et attendre que le modèle ait terminé la formation avant d’exécuter plus de code..

In [12]:
run.wait_for_completion(show_output=True) 

RunId: tweet_analyser_1_1643791180_aca4300b
Web View: https://ml.azure.com/runs/tweet_analyser_1_1643791180_aca4300b?wsid=/subscriptions/cc471f5d-ca48-413b-bb86-0bb26a8001ac/resourcegroups/djawed/workspaces/sept&tid=b1581410-09bc-49d2-a39c-17c8aa1ad3b0

Streaming azureml-logs/20_image_build_log.txt

2022/02/02 08:39:49 Downloading source code...
2022/02/02 08:39:50 Finished downloading source code
2022/02/02 08:39:51 Creating Docker network: acb_default_network, driver: 'bridge'
2022/02/02 08:39:51 Successfully set up Docker network: acb_default_network
2022/02/02 08:39:51 Setting up Docker configuration...
2022/02/02 08:39:52 Successfully set up Docker configuration
2022/02/02 08:39:52 Logging in to registry: tsept.azurecr.io
2022/02/02 08:39:52 Successfully logged into tsept.azurecr.io
2022/02/02 08:39:52 Executing step ID: acb_step_0. Timeout(sec): 5400, Working directory: '', Network: 'acb_default_network'
2022/02/02 08:39:52 Scanning for dependencies...
2022/02/02 08:39:53 Success

ExperimentExecutionException: ExperimentExecutionException:
	Message: The output streaming for the run interrupted.
But the run is still executing on the compute target. 
Details for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "The output streaming for the run interrupted.\nBut the run is still executing on the compute target. \nDetails for canceling the run can be found here: https://aka.ms/aml-docs-cancel-run"
    }
}

## Afficher les indicateurs de formation

In [13]:
print(run.get_metrics())

{}


## Fichiers associés à last run

In [None]:
print(run.get_file_names())

## Enregistrer le modèle

In [None]:
model = run.register_model(model_name='enr_model_tweet', model_path='outputs/model_KSO_tweet')
print(model.name, model.id, model.version, sep='\t')

## Fonction d'appel

In [None]:
%%writefile score.py
import json
import os
import numpy as np
import tensorflow as tf
def init():
    global model
    print('in init ======================================================>')
    
    tf.keras.models.load_model('outputs/model_KSO_tweet')

def run(raw_data):
    print('in run ======================================================>')
    data = np.array(json.loads(raw_data)['data'])
    # faire des prévisions
    y_hat = model.predict(data)
    # vous pouvez renvoyer n’importe quel type de données tant qu’il est JSON-serializable
    return y_hat.tolist()

## Créer un fichier de configuration

In [None]:
from azureml.core.webservice import AciWebservice

aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=4, 
                                               tags={"data": "tweet",  "method" : "cnn"}, 
                                               description='Prèdire le sentiment un tweet avec cnn')

myenv = Environment.get(workspace=ws, name='twitter-0-env')
myenv

## Déploiement dans l’ACI
Durée estimée : environ 2 à 5 minutes

Configurez l’image et déployez. Le code suivant passe par ces étapes :

>- Créer un objet d’environnement contenant les dépendances nécessaires au modèle en utilisant le fichier d’environnement (myenv.yml)
>- Créer la configuration d’inférence nécessaire pour déployer le modèle en tant que service Web en utilisant :
>- Le fichier de notation (score.py)
>- objet envrionment créé à l’étape précédente
>- Déployer le modèle sur le conteneur ACI.
>- Obtenir le service web HTTP endpoin

In [None]:
%%time
import uuid
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core.model import Model

model = Model(ws, 'enr_model_tweet')

inference_config = InferenceConfig(entry_script="score.py", environment=myenv)

service_name = 'tweet-service' + str(uuid.uuid4())[:4]
service = Model.deploy(workspace=ws, 
                       name=service_name, 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

## Optenir le point de terminaison

In [None]:
print(service.scoring_uri)