# Mettre en place MLflow

In [25]:
# Install the following librairies (it is better to create a venv (or conda) virtual environment first and install these librairies in it)
!pip install mlflow
!pip install --upgrade jinja2
!pip install --upgrade Flask
!pip install setuptools



In [26]:

# starts an MLflow server locally.
!mlflow server --host 127.0.0.1 --port 5000


^C


Initier une nouvelle Exp√©rience.

D√©marrer des Ex√©cutions au sein d'une Exp√©rience.

Documenter les param√®tres, les m√©triques et les balises pour vos Ex√©cutions.

Enregistrer des artefacts li√©s aux ex√©cutions, tels que des mod√®les, des tableaux, des graphiques, et plus encore.


In [5]:
from mlflow import MlflowClient
from pprint import pprint
from sklearn.ensemble import RandomForestRegressor


In [6]:
# In order to connect to the tracking server, we‚Äôll need to use the uri that we assigned the server when we started it.

client = MlflowClient(tracking_uri="http://127.0.0.1:5000")

#it allows programmatic interaction with the MLflow tracking server.

Nous avons maintenant une interface client vers le serveur de suivi qui peut √† la fois envoyer des donn√©es au serveur de suivi et en r√©cup√©rer.



In [7]:
all_experiments = client.search_experiments()

print(all_experiments)


[<Experiment: artifact_location='mlflow-artifacts:/286315274138648484', creation_time=1725693195773, experiment_id='286315274138648484', last_update_time=1725693195773, lifecycle_stage='active', name='LogisticRegression_Models', tags={}>, <Experiment: artifact_location='mlflow-artifacts:/892581525100417762', creation_time=1725647809507, experiment_id='892581525100417762', last_update_time=1725647809507, lifecycle_stage='active', name='Apple_Models', tags={'mlflow.note.content': 'This is the grocery forecasting project. This '
                        'experiment contains the produce models for apples.',
 'project_name': 'grocery-forecasting',
 'project_quarter': 'Q3-2023',
 'store_dept': 'produce',
 'team': 'stores-ml'}>, <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1725646407256, experiment_id='0', last_update_time=1725646407256, lifecycle_stage='active', name='Default', tags={}>]


### Cr√©er une exp√©rience

In [8]:
# Fournir une description de l'exp√©rience qui appara√Ætra dans l'interface utilisateur
experiment_description = (
    "Ceci est un projet de pr√©diction de d√©faut de paiement des clients. "
    "Cette exp√©rience contient les mod√®les pour la pr√©diction de d√©faut de cr√©dit."
)

# Fournir des balises (tags) recherchables qui d√©finissent les caract√©ristiques des ex√©cutions (Runs)
# qui feront partie de cette exp√©rience
experiment_tags = {
    "project_name": "credit-default-prediction",
    "business_unit": "risk-management",
    "team": "data-science",
    "project_quarter": "Q3-2023",
    "mlflow.note.content": experiment_description,
}

# Cr√©er l'exp√©rience en fournissant un nom unique
credit_default_experiment = client.create_experiment(
    name="Credit_Default_Models", tags=experiment_tags
)

In [9]:

# Utiliser search_experiments() pour rechercher les exp√©riences par la balise project_name

credit_default_experiment = client.search_experiments(
    filter_string="tags.`project_name` = 'credit-default-prediction'"
)

print(vars(credit_default_experiment[0]))



{'_experiment_id': '544091467554629469', '_name': 'Credit_Default_Models', '_artifact_location': 'mlflow-artifacts:/544091467554629469', '_lifecycle_stage': 'active', '_tags': {'business_unit': 'risk-management', 'mlflow.note.content': 'Ceci est un projet de pr√©diction de d√©faut de paiement des clients. Cette exp√©rience contient les mod√®les pour la pr√©diction de d√©faut de cr√©dit.', 'project_name': 'credit-default-prediction', 'project_quarter': 'Q3-2023', 'team': 'data-science'}, '_creation_time': 1725784122891, '_last_update_time': 1725784122891}


### Dataset

In [10]:
import pandas as pd

# Charger les donn√©es
data = pd.read_csv("Loan_Data(1).csv")



In [11]:

data.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


### Logging our first runs with MLflow

In [12]:
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [13]:
# This function call sets the global tracking URI for the current session.
# It‚Äôs a convenient way to configure the tracking server URI without creating a separate client instance.

mlflow.set_tracking_uri("http://127.0.0.1:5000")


In [15]:
# Sets the current active experiment to the "Apple_Models" experiment and
# returns the Experiment metadata
# D√©finir l'exp√©rience active pour votre projet
customer_default_experiment = mlflow.set_experiment("Client_Default_Prediction")

# Define a run name for this iteration of training.
# If this is not set, a unique name will be auto-generated for your run.
run_name = "default_rf_test"

# Define an artifact path that the model will be saved to.
# D√©finir le chemin des artefacts
artifact_path = "rf_default_model"



### Mod√®le

In [27]:
from sklearn.linear_model import LogisticRegression
import pandas as pd
import numpy as np
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report

# D√©finir l'URI de suivi
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# Cr√©er ou s√©lectionner l'exp√©rience pour la r√©gression logistique
mlflow.set_experiment("LogisticRegression_Models")

# D√©finir plusieurs combinaisons de param√®tres √† tester
params_list = [

    {"C": 0.01, "solver": 'liblinear', "random_state": 42, "class_weight": 'balanced'},
    {"C": 0.1, "solver": 'liblinear', "random_state": 42, "class_weight": 'balanced'},
    {"C": 1.0, "solver": 'liblinear', "random_state": 42, "class_weight": 'balanced'},
    {"C": 10.0, "solver": 'liblinear', "random_state": 42, "class_weight": 'balanced'},
    {"C": 100.0, "solver": 'liblinear', "random_state": 42, "class_weight": 'balanced'},
    {"C": 1.0, "solver": 'lbfgs', "random_state": 42, "class_weight": 'balanced', "max_iter": 100},
    {"C": 1.0, "solver": 'lbfgs', "random_state": 42, "class_weight": 'balanced', "max_iter": 200},
    {"C": 1.0, "solver": 'saga', "random_state": 42, "class_weight": 'balanced', "max_iter": 100},
    {"C": 1.0, "solver": 'saga', "random_state": 42, "class_weight": 'balanced', "max_iter": 200},
    {"C": 0.5, "solver": 'saga', "random_state": 42, "class_weight": 'balanced', "penalty": 'l1'},
    {"C": 0.5, "solver": 'saga', "random_state": 42, "class_weight": 'balanced', "penalty": 'l2'},
    {"C": 0.01, "solver": 'lbfgs', "random_state": 42, "class_weight": 'balanced', "max_iter": 1000},
]


# Charger vos donn√©es

X = data.drop(columns=["customer_id", "default"])
y = data["default"]

# Diviser les donn√©es
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Boucle pour entra√Æner le mod√®le avec diff√©rentes combinaisons de param√®tres
for i, params in enumerate(params_list):
    # D√©marrer un run pour chaque ensemble de param√®tres
    with mlflow.start_run(run_name=f"logistic_regression_run_{i+1}"):
        # Entra√Æner le mod√®le de r√©gression logistique
        lr_model = LogisticRegression(**params)
        lr_model.fit(X_train, y_train)

        # Pr√©dire sur l'ensemble de validation
        y_pred = lr_model.predict(X_val)

        # Calculer les m√©triques de classification
        accuracy = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred)
        precision = precision_score(y_val, y_pred)
        recall = recall_score(y_val, y_pred)

        # Assembler les m√©triques dans une collection pour les enregistrer
        metrics = {"accuracy": accuracy, "f1_score": f1, "precision": precision, "recall": recall}

        # Enregistrer les param√®tres et les m√©triques dans MLflow
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)

        # Enregistrer le mod√®le entra√Æn√© comme artefact
        mlflow.sklearn.log_model(
            sk_model=lr_model, input_example=X_val, artifact_path="logistic_regression_model"
        )

        # Afficher le rapport de classification
        print(f"Run {i+1} completed with parameters: {params}")
        print(classification_report(y_val, y_pred))


2024/09/08 11:11:27 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_1 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/89d7fadeb5ca4611931072a1c1295893.
2024/09/08 11:11:27 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 1 completed with parameters: {'C': 0.01, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:11:29 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_2 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/2048d65d33e04e839cf471cc4bbeb47f.
2024/09/08 11:11:29 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 2 completed with parameters: {'C': 0.1, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:11:32 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_3 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/5644f6235894494cb34166b0a1bcc628.
2024/09/08 11:11:32 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 3 completed with parameters: {'C': 1.0, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:11:35 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_4 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/2b1cd40a447c45e88d0b1b0a84042e32.
2024/09/08 11:11:35 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 4 completed with parameters: {'C': 10.0, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:11:38 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_5 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/a831d5a14215403e92bc321f5adddd57.
2024/09/08 11:11:38 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 5 completed with parameters: {'C': 100.0, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
2024/09/08 11:11:43 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_6 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/73c980e456424026acfd69e87661592a.
2024/09/08 11:11:43 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 6 completed with parameters: {'C': 1.0, 'solver': 'lbfgs', 'random_state': 42, 'class_weight': 'balanced', 'max_iter': 100}
              precision    recall  f1-score   support

           0       1.00      0.99      0.99      1652
           1       0.95      0.98      0.96       348

    accuracy                           0.99      2000
   macro avg       0.97      0.99      0.98      2000
weighted avg       0.99      0.99      0.99      2000



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
2024/09/08 11:11:48 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_7 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/a735153917464d398d884720e48ee497.
2024/09/08 11:11:48 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 7 completed with parameters: {'C': 1.0, 'solver': 'lbfgs', 'random_state': 42, 'class_weight': 'balanced', 'max_iter': 200}
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1652
           1       0.99      0.99      0.99       348

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000



2024/09/08 11:11:53 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_8 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/65bc7bc18ba24f40a4e0b1a7e234ee00.
2024/09/08 11:11:53 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 8 completed with parameters: {'C': 1.0, 'solver': 'saga', 'random_state': 42, 'class_weight': 'balanced', 'max_iter': 100}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:11:58 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_9 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/45eb59bd12ac4eb294b28493ad10e5b6.
2024/09/08 11:11:58 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 9 completed with parameters: {'C': 1.0, 'solver': 'saga', 'random_state': 42, 'class_weight': 'balanced', 'max_iter': 200}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:12:03 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_10 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/a07c280d492c4e85bd20afb08b7bf334.
2024/09/08 11:12:03 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 10 completed with parameters: {'C': 0.5, 'solver': 'saga', 'random_state': 42, 'class_weight': 'balanced', 'penalty': 'l1'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:12:09 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_11 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/a22926bb243e4b2c84420acb91b5b78f.
2024/09/08 11:12:09 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 11 completed with parameters: {'C': 0.5, 'solver': 'saga', 'random_state': 42, 'class_weight': 'balanced', 'penalty': 'l2'}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.91      0.98      0.94       348

    accuracy                           0.98      2000
   macro avg       0.95      0.98      0.96      2000
weighted avg       0.98      0.98      0.98      2000



2024/09/08 11:12:14 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_12 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/fadf18eb20bd4ea29a775ab3efd157ae.
2024/09/08 11:12:14 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run 12 completed with parameters: {'C': 0.01, 'solver': 'lbfgs', 'random_state': 42, 'class_weight': 'balanced', 'max_iter': 1000}
              precision    recall  f1-score   support

           0       1.00      0.98      0.99      1652
           1       0.92      0.99      0.95       348

    accuracy                           0.98      2000
   macro avg       0.96      0.99      0.97      2000
weighted avg       0.98      0.98      0.98      2000



In [28]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pickle

# Diviser les donn√©es en caract√©ristiques (X) et cible (y)
X = data.drop(columns=["customer_id", "default"])  # Enlever les colonnes 'customer_id' et 'default'
y = data["default"]

# Diviser les donn√©es en ensembles d'entra√Ænement et de validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# D√©finir une nouvelle exp√©rience pour le mod√®le Random Forest
mlflow.set_experiment("RandomForest_Models")

# Liste des ensembles de param√®tres √† explorer
params_list = [
    {"n_estimators": 50, "max_depth": 4, "min_samples_split": 5, "min_samples_leaf": 2, "bootstrap": True, "oob_score": True, "random_state": 888, "class_weight": 'balanced'},
    {"n_estimators": 100, "max_depth": 6, "min_samples_split": 10, "min_samples_leaf": 4, "bootstrap": True, "oob_score": True, "random_state": 888, "class_weight": 'balanced'},
    {"n_estimators": 200, "max_depth": 8, "min_samples_split": 15, "min_samples_leaf": 6, "bootstrap": True, "oob_score": True, "random_state": 888, "class_weight": 'balanced'},
    {"n_estimators": 100, "max_depth": None, "min_samples_split": 10, "min_samples_leaf": 2, "bootstrap": True, "oob_score": True, "random_state": 888, "class_weight": 'balanced'},
    {"n_estimators": 150, "max_depth": 10, "min_samples_split": 8, "min_samples_leaf": 3, "bootstrap": True, "oob_score": True, "random_state": 888, "class_weight": 'balanced'},
    {"n_estimators": 200, "max_depth": None, "min_samples_split": 5, "min_samples_leaf": 1, "bootstrap": False, "oob_score": False, "random_state": 888, "class_weight": 'balanced'},
]

best_rf_model = None  # Variable pour stocker le meilleur mod√®le

# It√©rer sur chaque ensemble de param√®tres et effectuer un run pour chaque configuration
for params in params_list:
    with mlflow.start_run(run_name=f"random_forest_run_{params['n_estimators']}_{params['max_depth']}"):
        # Entra√Æner le mod√®le RandomForestClassifier avec les param√®tres actuels
        rf = RandomForestClassifier(**params)
        rf.fit(X_train, y_train)

        # Pr√©dire sur l'ensemble de validation
        y_pred = rf.predict(X_val)

        # Calculer les m√©triques de classification
        accuracy = accuracy_score(y_val, y_pred)
        f1 = f1_score(y_val, y_pred)
        precision = precision_score(y_val, y_pred)
        recall = recall_score(y_val, y_pred)

        # Assembler les m√©triques dans une collection pour les enregistrer
        metrics = {"accuracy": accuracy, "f1_score": f1, "precision": precision, "recall": recall}

        # Enregistrer les param√®tres et les m√©triques dans MLflow
        mlflow.log_params(params)
        mlflow.log_metrics(metrics)

        # Enregistrer une instance du mod√®le entra√Æn√© pour une utilisation ult√©rieure dans MLflow
        mlflow.sklearn.log_model(
            sk_model=rf, input_example=X_val, artifact_path="random_forest_model"
        )

        # Afficher le rapport de classification
        print(f"Run completed with parameters: {params}")
        print(f"Accuracy: {accuracy:.4f}, F1-Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

        # Mettre √† jour le meilleur mod√®le
        best_rf_model = rf

# Enregistrer le dernier mod√®le (consid√©r√© comme le meilleur mod√®le) au format pickle
if best_rf_model is not None:
    with open("random_forest_model2pkl", "wb") as f:
        pickle.dump(best_rf_model, f)

2024/09/08 11:27:31 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_50_4 at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/6a5414f281fa4e58853633ab80b04bb7.
2024/09/08 11:27:31 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 50, 'max_depth': 4, 'min_samples_split': 5, 'min_samples_leaf': 2, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9725, F1-Score: 0.9262, Precision: 0.8690, Recall: 0.9914


2024/09/08 11:27:34 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_100_6 at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/59e10e1497664757b68d66c277e0252b.
2024/09/08 11:27:34 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 100, 'max_depth': 6, 'min_samples_split': 10, 'min_samples_leaf': 4, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9915, F1-Score: 0.9759, Precision: 0.9636, Recall: 0.9885


2024/09/08 11:27:37 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_200_8 at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/5c9154598590432f8fddc5ef94f357dc.
2024/09/08 11:27:37 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 200, 'max_depth': 8, 'min_samples_split': 15, 'min_samples_leaf': 6, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9890, F1-Score: 0.9689, Precision: 0.9528, Recall: 0.9856


2024/09/08 11:27:40 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_100_None at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/af454d45f1bb44f782714035c0f595d8.
2024/09/08 11:27:40 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 100, 'max_depth': None, 'min_samples_split': 10, 'min_samples_leaf': 2, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9940, F1-Score: 0.9828, Precision: 0.9828, Recall: 0.9828


2024/09/08 11:27:43 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_150_10 at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/aa42fcd8a960450495394ea589b0536a.
2024/09/08 11:27:43 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 150, 'max_depth': 10, 'min_samples_split': 8, 'min_samples_leaf': 3, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9925, F1-Score: 0.9785, Precision: 0.9744, Recall: 0.9828


2024/09/08 11:27:46 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_200_None at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/c943fb900a42473c89d60cde69e6b326.
2024/09/08 11:27:46 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 200, 'max_depth': None, 'min_samples_split': 5, 'min_samples_leaf': 1, 'bootstrap': False, 'oob_score': False, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9955, F1-Score: 0.9870, Precision: 0.9913, Recall: 0.9828


#### mod√®le pour faire le versionning comme demand√©

In [30]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pickle

# Diviser les donn√©es en caract√©ristiques (X) et cible (y)
X = data.drop(columns=["customer_id", "default"])  # Enlever les colonnes 'customer_id' et 'default'
y = data["default"]

# Diviser les donn√©es en ensembles d'entra√Ænement et de validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# D√©finir une nouvelle exp√©rience pour le mod√®le Random Forest
mlflow.set_experiment("RandomForest_Models")

# D√©finir un ensemble de param√®tres pour le mod√®le RandomForestClassifier
params = {
    "n_estimators": 100,
    "max_depth": 5,
    "min_samples_split": 20,
    "min_samples_leaf": 10,
    "bootstrap": True,
    "oob_score": True,
    "random_state": 888,
    "class_weight": 'balanced'
}

# D√©marrer un run dans MLflow
with mlflow.start_run(run_name=f"random_forest_run_{params['n_estimators']}_{params['max_depth']}"):
    # Entra√Æner le mod√®le RandomForestClassifier avec les param√®tres actuels
    rf = RandomForestClassifier(**params)
    rf.fit(X_train, y_train)

    # Pr√©dire sur l'ensemble de validation
    y_pred = rf.predict(X_val)

    # Calculer les m√©triques de classification
    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)

    # Assembler les m√©triques dans une collection pour les enregistrer
    metrics = {"accuracy": accuracy, "f1_score": f1, "precision": precision, "recall": recall}

    # Enregistrer les param√®tres et les m√©triques dans MLflow
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)

    # Enregistrer une instance du mod√®le entra√Æn√© pour une utilisation ult√©rieure dans MLflow
    mlflow.sklearn.log_model(
        sk_model=rf, input_example=X_val, artifact_path="random_forest_model"
    )

    # Afficher le rapport de classification
    print(f"Run completed with parameters: {params}")
    print(f"Accuracy: {accuracy:.4f}, F1-Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")




2024/09/08 11:41:35 INFO mlflow.tracking._tracking_service.client: üèÉ View run random_forest_run_100_5 at: http://127.0.0.1:5000/#/experiments/276980095504201702/runs/5a8973b247484af595035f94c1da3908.
2024/09/08 11:41:35 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/276980095504201702.


Run completed with parameters: {'n_estimators': 100, 'max_depth': 5, 'min_samples_split': 20, 'min_samples_leaf': 10, 'bootstrap': True, 'oob_score': True, 'random_state': 888, 'class_weight': 'balanced'}
Accuracy: 0.9825, F1-Score: 0.9517, Precision: 0.9151, Recall: 0.9914


In [31]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import pickle

# Diviser les donn√©es en caract√©ristiques (X) et cible (y)
X = data.drop(columns=["customer_id", "default"])  # Enlever les colonnes 'customer_id' et 'default'
y = data["default"]

# Diviser les donn√©es en ensembles d'entra√Ænement et de validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# D√©finir une nouvelle exp√©rience pour le mod√®le de R√©gression Logistique
mlflow.set_experiment("LogisticRegression_Models")

# D√©finir un ensemble de param√®tres pour le mod√®le LogisticRegression
params = {
    "C": 1.0,
    "solver": 'liblinear',
    "random_state": 42,
    "class_weight": 'balanced'
}

# D√©marrer un run dans MLflow
with mlflow.start_run(run_name=f"logistic_regression_run_{params['C']}"):
    # Entra√Æner le mod√®le LogisticRegression avec les param√®tres actuels
    lr = LogisticRegression(**params)
    lr.fit(X_train, y_train)

    # Pr√©dire sur l'ensemble de validation
    y_pred = lr.predict(X_val)

    # Calculer les m√©triques de classification
    accuracy = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred)
    recall = recall_score(y_val, y_pred)

    # Assembler les m√©triques dans une collection pour les enregistrer
    metrics = {"accuracy": accuracy, "f1_score": f1, "precision": precision, "recall": recall}

    # Enregistrer les param√®tres et les m√©triques dans MLflow
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)

    # Enregistrer une instance du mod√®le entra√Æn√© pour une utilisation ult√©rieure dans MLflow
    mlflow.sklearn.log_model(
        sk_model=lr, input_example=X_val, artifact_path="logistic_regression_model"
    )

    # Afficher le rapport de classification
    print(f"Run completed with parameters: {params}")
    print(f"Accuracy: {accuracy:.4f}, F1-Score: {f1:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}")

# Enregistrer le mod√®le au format pickle
with open("logistic_regression_model.pkl", "wb") as f:
    pickle.dump(lr, f)


2024/09/08 11:43:44 INFO mlflow.tracking._tracking_service.client: üèÉ View run logistic_regression_run_1.0 at: http://127.0.0.1:5000/#/experiments/286315274138648484/runs/133483d3214746fbb16a0d31f8f85a7e.
2024/09/08 11:43:44 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:5000/#/experiments/286315274138648484.


Run completed with parameters: {'C': 1.0, 'solver': 'liblinear', 'random_state': 42, 'class_weight': 'balanced'}
Accuracy: 0.9790, F1-Score: 0.9420, Precision: 0.9069, Recall: 0.9799
