<a href="https://colab.research.google.com/github/JosueAfouda/MLflow/blob/main/first_model_with_MLflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#pip install mlflow

In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
dataset = datasets.load_iris()
seed = 123
X_train, X_test, y_train, y_test = train_test_split(
    dataset.data,
    dataset.target,
    test_size = 0.4,
    random_state = seed
)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(90, 4)
(90,)
(60, 4)
(60,)


In [3]:
X_train

array([[4.5, 2.3, 1.3, 0.3],
       [6.7, 3.3, 5.7, 2.1],
       [5.7, 3. , 4.2, 1.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.8, 3.4, 1.6, 0.2],
       [6.3, 2.9, 5.6, 1.8],
       [6.4, 2.9, 4.3, 1.3],
       [7.7, 2.8, 6.7, 2. ],
       [7.7, 2.6, 6.9, 2.3],
       [5.1, 3.8, 1.9, 0.4],
       [6.4, 3.2, 4.5, 1.5],
       [5.5, 2.4, 3.7, 1. ],
       [5.9, 3. , 5.1, 1.8],
       [4.8, 3. , 1.4, 0.1],
       [7.2, 3. , 5.8, 1.6],
       [6.3, 3.3, 4.7, 1.6],
       [7. , 3.2, 4.7, 1.4],
       [5. , 3. , 1.6, 0.2],
       [6.4, 2.8, 5.6, 2.1],
       [6.3, 2.5, 5. , 1.9],
       [5. , 3.5, 1.6, 0.6],
       [4.9, 3. , 1.4, 0.2],
       [6.1, 2.8, 4. , 1.3],
       [6.5, 2.8, 4.6, 1.5],
       [6.3, 3.3, 6. , 2.5],
       [5.8, 4. , 1.2, 0.2],
       [4.6, 3.4, 1.4, 0.3],
       [5.5, 2.4, 3.8, 1.1],
       [5. , 3.4, 1.6, 0.4],
       [5.9, 3.2, 4.8, 1.8],
       [6.9, 3.1, 5.4, 2.1],
       [4.8, 3.1, 1.6, 0.2],
       [6.7, 2.5, 5.8, 1.8],
       [5.7, 4.4, 1.5, 0.4],
       [5.7, 3

In [4]:
y_train

array([0, 2, 1, 0, 0, 2, 1, 2, 2, 0, 1, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 0,
       1, 1, 2, 0, 0, 1, 0, 1, 2, 0, 2, 0, 0, 1, 0, 0, 1, 2, 1, 1, 1, 0,
       0, 1, 2, 0, 0, 1, 1, 1, 2, 1, 1, 1, 2, 0, 0, 1, 2, 2, 2, 2, 0, 1,
       0, 1, 1, 0, 1, 2, 1, 2, 2, 0, 1, 0, 2, 2, 1, 1, 2, 2, 1, 0, 1, 1,
       2, 2])

In [5]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression()
clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


En ajoutant quelques lignes de code au script précédent, on peut déjà enregistrer une première expérience avec MLflow.

In [6]:
from sklearn.linear_model import LogisticRegression
import mlflow
# Définition d'une expérience (Une expérience peut contenir plusieurs *runs*)
mlflow.set_experiment("Mushrooms-Classification")
mlflow.sklearn.autolog() # Ici on fait appel à l'API MLflow qui intégre les algos de Scikit-Learn.
                        # C'est grâce à ce code qu'on enregistre toutes les métriques et métadonnées de cette expérience (Module MLflow Tracking)
with mlflow.start_run(run_name = 'reg_logistic_defaut'):
  clf = LogisticRegression()
  clf.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
# Création d'un second run dans la même expérience
with mlflow.start_run(run_name = 'reg_logistic_max_iter_200'):
  clf2 = LogisticRegression(max_iter=200)
  clf2.fit(X_train, y_train)

In [8]:
# Création d'une seconde expérience
mlflow.set_experiment("Mushrooms-Classification")
mlflow.sklearn.autolog()
from sklearn.tree import DecisionTreeClassifier
with mlflow.start_run(run_name = 'decision_tree_defaut'):
  dt1 = DecisionTreeClassifier(random_state=seed)
  dt1.fit(X_train, y_train)

In [9]:
with mlflow.start_run(run_name = 'decision_tree_max_depth3'):
  dt2 = DecisionTreeClassifier(random_state=seed, max_depth=3)
  dt2.fit(X_train, y_train)

In [10]:
import numpy as np
new_data = np.array([[6.5, 3.2, 5.5, 1.9]])
new_data

array([[6.5, 3.2, 5.5, 1.9]])

In [11]:
new_data.shape

(1, 4)

In [12]:
# Récupérer le dernier run_id de l'expérience spécifiée
experiment_name = "Mushrooms-Classification"  # Ou "Decision_Tree_Models"
experiment = mlflow.get_experiment_by_name(experiment_name)
runs_df = mlflow.search_runs(experiment_ids=[experiment.experiment_id], order_by=["start_time DESC"])
run_id = runs_df.iloc[0].run_id  # Récupère le run_id de la dernière exécution

In [13]:
# Prédictions
logged_model = f"runs:/{run_id}/model"

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
prediction = loaded_model.predict(new_data)
prediction

array([2])

In [14]:
import mlflow
from sklearn.metrics import accuracy_score, precision_score, recall_score
import mlflow.sklearn

# Définir l'URI de suivi pour utiliser l'adresse locale
mlflow.set_tracking_uri("http://192.168.1.131:5001/")

# Ensuite, configurez votre expérience et commencez à enregistrer les runs comme d'habitude
mlflow.set_experiment("Mushrooms-Classification")

with mlflow.start_run(run_name="run_with_local_server"):
    # Entraîner votre modèle sur les données d'entraînement
    clf.fit(X_train, y_train)

    # Prédire sur les données de test
    y_pred = clf.predict(X_test)

    # Enregistrer des paramètres (ex. : hyperparamètres)
    mlflow.log_param("model_type", "Logistic Regression")
    
    # Calculer et enregistrer des métriques
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average="weighted")
    recall = recall_score(y_test, y_pred, average="weighted")

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)

    # Enregistrer le modèle dans MLflow pour qu'il soit réutilisable
    mlflow.sklearn.log_model(clf, "model")

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
2024/11/06 16:31:16 INFO mlflow.tracking._tracking_service.client: 🏃 View run run_with_local_server at: http://192.168.1.131:5001/#/experiments/28/runs/074b214cb2e14fed91225e2a24eb82b3.
2024/11/06 16:31:16 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://192.168.1.131:5001/#/experiments/28.


In [15]:
!mlflow server --host 192.168.1.131 --port 5001


Traceback (most recent call last):
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\site-packages\waitress\__main__.py", line 3, in <module>
    run()  # pragma nocover
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\site-packages\waitress\runner.py", line 298, in run
    _serve(app, **kw)
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\site-packages\waitress\__init__.py", line 13, in serve
    server = _server(app, **kw)
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib\site-packages\waitress\server.py", line 78, in create_server
    last_serv = TcpWSGIServer(
  File "C:\Users\tdessolin\AppData\Local\miniconda3\envs\mlflow\lib

In [16]:
#pip install pyngrok

In [17]:
# from pyngrok import ngrok
# # Terminate open tunnels if exist
# ngrok.kill()

# # Setting the authtoken (optional)
# # Get your authtoken from https://dashboard.ngrok.com/auth
# NGROK_AUTH_TOKEN = "2oTOaExNPQbTC7FhUxazhzFytpP_JgynyueMaZk9qaSJvnCv" 
# ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# # Open an HTTPs tunnel on port 5000 for http://localhost:5000
# ngrok_tunnel = ngrok.connect(addr="5000", proto="http", bind_tls=True)
# print("MLflow Tracking UI:", ngrok_tunnel.public_url)

In [18]:
#!mlflow ui