<a href="https://colab.research.google.com/github/JosueAfouda/MLflow/blob/main/first_model_with_MLflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#pip install mlflow

In [2]:
import pandas as pd

url = 'https://raw.githubusercontent.com/RiccardoAncarani/mushrooms-machine-learning/refs/heads/master/mushrooms.csv'

dataset = pd.read_csv(url)
dataset.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g


In [None]:
from sklearn.model_selection import train_test_split

X = dataset.drop(columns=['class'])
y = dataset['class']

# Encodage des variables catégorielles
X = pd.get_dummies(X, drop_first=True)

In [4]:
seed = 123
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size = 0.4,
    random_state = seed
)

print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(4874, 22)
(4874,)
(3250, 22)
(3250,)


In [5]:
X_train

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
6812,x,s,e,f,f,f,c,n,b,t,...,s,w,w,p,w,o,e,w,v,p
2277,x,f,n,t,n,f,c,b,w,t,...,s,w,p,p,w,o,p,n,y,d
254,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,s,g
6229,f,y,n,f,y,f,c,n,b,t,...,s,p,w,p,w,o,e,w,v,d
5303,x,f,y,f,f,f,c,b,g,e,...,k,p,b,p,w,o,l,h,v,p
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4060,x,y,y,f,f,f,c,b,h,e,...,k,b,b,p,w,o,l,h,v,g
1346,f,y,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,n,v,g
3454,x,f,w,f,c,f,w,n,u,e,...,s,w,w,p,w,o,p,n,s,d
7533,x,s,n,f,n,f,c,b,w,e,...,y,n,n,p,w,t,p,w,y,p


In [6]:
y_train

6812    p
2277    e
254     e
6229    p
5303    p
       ..
4060    p
1346    p
3454    p
7533    e
3582    e
Name: class, Length: 4874, dtype: object

In [7]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)

ValueError: could not convert string to float: 'x'

En ajoutant quelques lignes de code au script précédent, on peut déjà enregistrer une première expérience avec MLflow.

In [None]:
from sklearn.linear_model import LogisticRegression
import mlflow
# Définition d'une expérience (Une expérience peut contenir plusieurs *runs*)
mlflow.set_experiment("Reg_Logistic_Models")
mlflow.sklearn.autolog() # Ici on fait appel à l'API MLflow qui intégre les algos de Scikit-Learn.
                        # C'est grâce à ce code qu'on enregistre toutes les métriques et métadonnées de cette expérience (Module MLflow Tracking)
with mlflow.start_run(run_name = 'reg_logistic_defaut'):
  clf = LogisticRegression()
  clf.fit(X_train, y_train)

In [None]:
# Création d'un second run dans la même expérience
with mlflow.start_run(run_name = 'reg_logistic_max_iter_200'):
  clf2 = LogisticRegression(max_iter=200)
  clf2.fit(X_train, y_train)

In [None]:
# Création d'une seconde expérience
mlflow.set_experiment("Decision_Tree_Models")
mlflow.sklearn.autolog()
from sklearn.tree import DecisionTreeClassifier
with mlflow.start_run(run_name = 'decision_tree_defaut'):
  dt1 = DecisionTreeClassifier(random_state=seed)
  dt1.fit(X_train, y_train)

In [None]:
with mlflow.start_run(run_name = 'decision_tree_max_depth3'):
  dt2 = DecisionTreeClassifier(random_state=seed, max_depth=3)
  dt2.fit(X_train, y_train)

In [None]:
import numpy as np
new_data = np.array([[6.5, 3.2, 5.5, 1.9]])
new_data

In [None]:
new_data.shape

In [None]:
# Récupérer le dernier run_id de l'expérience spécifiée
experiment_name = "Reg_Logistic_Models"  # Ou "Decision_Tree_Models"
experiment = mlflow.get_experiment_by_name(experiment_name)
runs_df = mlflow.search_runs(experiment_ids=[experiment.experiment_id], order_by=["start_time DESC"])
run_id = runs_df.iloc[0].run_id  # Récupère le run_id de la dernière exécution

In [None]:
# Spécifier le chemin du modèle enregistré
logged_model = f"runs:/{run_id}/model"

# Charger le modèle
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Prédiction
prediction = loaded_model.predict(new_data)
print("Prédiction :", prediction)

In [None]:
pip install pyngrok

In [None]:
from pyngrok import ngrok
# Terminate open tunnels if exist
ngrok.kill()

# Setting the authtoken (optional)
# Get your authtoken from https://dashboard.ngrok.com/auth
NGROK_AUTH_TOKEN = "2oTOaExNPQbTC7FhUxazhzFytpP_JgynyueMaZk9qaSJvnCv" 
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Open an HTTPs tunnel on port 5000 for http://localhost:5000
ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
print("MLflow Tracking UI:", ngrok_tunnel.public_url)

In [None]:
!mlflow ui

In [None]:
!mlflow ui