### Créer et gérer un modèle de machine learning avec MLFlow

Mise en oeuvre de MLFlow Tracking


In [1]:
!pip install mlflow




[notice] A new release of pip is available: 23.3.1 -> 23.3.2
[notice] To update, run: python.exe -m pip install --upgrade pip




#### Data

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [3]:
dataset = datasets.load_iris()
dataset

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

#### Split data

In [4]:
seed = 123
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size = 0.2, random_state = seed)


In [5]:
X_train.shape

(120, 4)

In [6]:
X_test.shape

(30, 4)

In [7]:
y_train.shape

(120,)

In [8]:
y_test.shape

(30,)

#### Entrainement du model enregistrement des métriques avec MLFlow

In [22]:
from sklearn.linear_model import LogisticRegression
import mlflow
# experience
mlflow.set_experiment("Reg_Logistic_Models") # nom de la branche (avec 1 ou plusieurs executions)
# definition d'un run
mlflow.sklearn.autolog()
with mlflow.start_run(run_name = "reg_logistic_default"):
    clf_def = LogisticRegression()
    clf_def.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [23]:
# Second run

with mlflow.start_run(run_name = "reg_logistic_max_iter_200"):
    clf2 = LogisticRegression(max_iter=200)
    clf2.fit(X_train, y_train)

#### Visualisation dans MLFLOW avec NGROK

##### Installation de ngrok

In [24]:
#!pip install pyngrok

##### Fonction d'affichage

In [25]:
from pyngrok import ngrok
# Terminate open tunnels if exist
ngrok.kill()
# Authentification from https://dashboard.ngrok.com/auth
NGROK_AUTH_TOKEN = "2ZqhgxsbL4EWJGWX2U3DFEUscPm_3DRRRS89oeTZKYoiHQh3Q"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPS tunnel on a port 5000 for  https://localhost:5000
ngrok_tunnel = ngrok.connect(addr = "5000", proto = "http", bind_tls = True)
print("MLFLOW Tracking UI : " , ngrok_tunnel.public_url)

t=2023-12-21T11:53:00+0100 lvl=warn msg="can't bind default web address, trying alternatives" obj=web addr=127.0.0.1:4040


MLFLOW Tracking UI :  https://149e-176-171-148-102.ngrok-free.app


In [26]:
!mlflow ui

^C


### Prédiction (consommation du modèle) generé avec MLFlow

In [32]:
import numpy as np
data_new_flower = np.array([[5.2, 3.9, 4.5, 1.8]])
data_new_flower

array([[5.2, 3.9, 4.5, 1.8]])

In [None]:
import mlflow

logged_model = 'runs: ......y_model'


# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
prediction = loaded_model.predict(pd.DataFrame(data_new_flower))
prediction

#### Création nouvelle expérience avec arbres de décision

In [36]:

mlflow.set_experiment("Decision_Tree_Models")
from sklearn.tree import DecisionTreeClassifier
mlflow.sklearn.autolog()
with mlflow.start_run(run_name = "dt_defaut"): # classique en ML, construction du modèle par défaut ( à customiser après)
    dt = DecisionTreeClassifier(random_state = seed)
    dt.fit(X_train, y_train)

In [37]:
# Second run
with mlflow.start_run(run_name = "dt_max_depth3"):
    dt1 = DecisionTreeClassifier(random_state = seed, max_depth = 3)
    dt1.fit(X_train, y_train)

##### Visualisation des résultats

In [38]:
from pyngrok import ngrok

ngrok.kill()
# Authentification from https://dashboard.ngrok.com/auth
NGROK_AUTH_TOKEN = "2ZqhgxsbL4EWJGWX2U3DFEUscPm_3DRRRS89oeTZKYoiHQh3Q"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

ngrok_tunnel = ngrok.connect(addr = "5000", proto = "http", bind_tls = True)
print("MLFLOW Tracking UI : " , ngrok_tunnel.public_url)

t=2023-12-21T12:20:16+0100 lvl=warn msg="can't bind default web address, trying alternatives" obj=web addr=127.0.0.1:4040


MLFLOW Tracking UI :  https://775d-2001-861-32c5-f7a0-758d-78cc-9b9c-8b9.ngrok-free.app


In [None]:
!mlflow ui