# OC - Parcours Ingénieur IA - Projet 7 - BERKAN Asli Ceren

## Modèle sur mesure

In [1]:
import os # pour récupérer les données dans env
from dotenv import load_dotenv # pour sécuriser la clé, le lien et la location

# Récupérer la clé et l'endpoint
load_dotenv()

# Définition des variables
SUSCRIPTION_ID = os.getenv("SUSCRIPTION_ID")
RESOURCE_GROUP = os.getenv("RESOURCE_GROUP")
WORKSPACE_NAME = os.getenv("WORKSPACE_NAME")

In [2]:
# Connexion à l'espace de travail
from azureml.core import Workspace
ws = Workspace(subscription_id=SUSCRIPTION_ID,
               resource_group=RESOURCE_GROUP,
               workspace_name=WORKSPACE_NAME)

# write the details of the workspace to a configuration file to the notebook library
ws.write_config()

In [3]:
%%writefile source_dir/echo_score.py
import json
import numpy as np
import os
from tensorflow import keras
from keras import backend as K
from azureml.core.model import Model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from nltk.stem import WordNetLemmatizer
import re
nltk.download('stopwords')
nltk.download('wordnet')

# définition des métriques
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

# Prétraitement
def preprocess(text):
    stop_words = stopwords.words("english")
    lemmatizer = WordNetLemmatizer()
    text_cleaning_re = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+|[-+]?\d*\.\d+|\d+"
    # Remove link,user and special characters
    text = re.sub(text_cleaning_re, ' ', str(text).lower()).strip()
    tokens = []
    for token in text.split():
        if token not in stop_words:
            tokens.append(lemmatizer.lemmatize(token))
    return " ".join(tokens)

def init():
    global model
    global tokenizer
    tokenizer_path = Model.get_model_path(model_name='tokenizer')
    with open(tokenizer_path, 'rb') as handle:
        tokenizer = pickle.load(handle)
        
    model_path = Model.get_model_path(model_name='model_keras_h5')
    model = keras.models.load_model(model_path,
                                     custom_objects={'f1_m':f1_m,
                                                     'precision_m':precision_m,
                                                     'recall_m':recall_m})

def run(raw_data):
    try:
        # Retourner prédictions du model
        data = np.array(json.loads(raw_data)['data'])
        data_prepocess = []
        for s in data :
            data_prepocess.append(preprocess(s))
        data_tok = pad_sequences(tokenizer.texts_to_sequences(data_prepocess), maxlen=300)
        predictions = model.predict(data_tok)
        sentiment =[]
        for s in predictions :
            if s>0.5 :
                sentiment.append('POSITIVE')
            else :
                sentiment.append('NEGATIVE')
        return sentiment
    except Exception as e:
        error = str(e)
        return error

Overwriting source_dir/echo_score.py


In [4]:
from azureml.core import Environment
from azureml.core.model import InferenceConfig

# Préparation de l'environnement
myenv = Environment(name="project_environment")
python_packages = ['nltk', 'numpy', 'tensorflow']
for package in python_packages:
    myenv.python.conda_dependencies.add_pip_package(package)

In [5]:
from azureml.core.webservice import AciWebservice

# Configuration de l'ACI
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=1, 
                                               tags={"data": "sentiment140",  "method" : "Keras"}, 
                                               description='Predict sentiment with Keras')

In [7]:
from azureml.core.model import Model

# Inscrire le modèle final dans Azure
model = Model.register(model_path="modele_final.h5",
                        model_name='model_keras_h5',
                        workspace=ws)

# Inscrire le tokenizer final dans Azure
tokenizer = Model.register(model_path="tokenizer.pkl",
                        model_name='tokenizer',
                        workspace=ws)

Registering model model_keras_h5
Registering model tokenizer


In [8]:
%%time
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
    
myenv.register(workspace=ws)
inference_config = InferenceConfig(entry_script="source_dir/echo_score.py", environment=myenv)

service = Model.deploy(workspace=ws, 
                       name='myservice', 
                       models=[model, tokenizer], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-12-08 21:07:38+01:00 Creating Container Registry if not exists.
2021-12-08 21:07:38+01:00 Registering the environment.
2021-12-08 21:07:41+01:00 Use the existing image.
2021-12-08 21:07:42+01:00 Generating deployment configuration.
2021-12-08 21:07:43+01:00 Submitting deployment to compute.
2021-12-08 21:07:47+01:00 Checking the status of deployment myservice..
2021-12-08 21:09:34+01:00 Checking the status of inference endpoint myservice.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Wall time: 2min 3s


In [9]:
import json
import pandas as pd

df_test = pd.read_csv('Data/Pretreated_data/df_test.csv', sep=',', encoding='utf-8', index_col=0)

# Préparation des données textuelles
raw_text = df_test.text.to_list()
raw_text = json.dumps({"data": raw_text})

In [10]:
import numpy as np

# Envoyer les données au service
y_hat = service.run(input_data=raw_text)
y_hat

['NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',
 'NEGATIVE',
 'NEGATIVE',
 'POSITIVE',
 'POSITIVE',
 'NEGATIVE',
 'POSITIVE',
 'NEGATIVE',