## Prise en main MLflow

In [1]:
import os
import mlflow
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split


# Métriques & courbes
from sklearn.metrics import (accuracy_score, f1_score)

# import des modèles
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
# mlflow server --host 127.0.0.1 --port 5000
# mlflow server --backend-store-uri mlruns --default-artifact-root mlruns --host 127.0.0.1 --port 5000

In [2]:
data_path = "E:\Formation_Data_Analystic\projet_MLOps_GAI\Projet_MLOps\data\datasetfinal.csv"
target_col = "default"

# Charger et préparer les données
df = pd.read_csv(data_path)
X = df.drop(target_col, axis=1)
y = df[target_col]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Identification de l'interface MLflow
mlflow.set_tracking_uri("file:///E:/Formation_Data_Analystic/projet_MLOps_GAI/Projet_MLOps/mlflow/mlruns")

### Random Forest

In [10]:

# Identification du nom du projet MLflow
mlflow.set_experiment("experiment_credit_default")
with mlflow.start_run() as run:
    model = RandomForestClassifier(n_estimators = 100, max_depth = 5, random_state=42)
    model.fit(X_train, y_train)

    # On calcule le score du modèle sur le test
    y_pred = model.predict(X_test)
    score = f1_score(y_test, y_pred )

    params = model.get_params()
    mlflow.log_params(params)
    mlflow.log_metric("f1", score)

    print(mlflow.get_artifact_uri())
    mlflow.sklearn.log_model(model, "model")



file:///E:/Formation_Data_Analystic/projet_MLOps_GAI/Projet_MLOps/mlflow/mlruns/304591423748084014/0c9905eeb96d40b8b9c8b31443eea13b/artifacts




In [8]:
model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 5,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'monotonic_cst': None,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}

### Avec le Modèle Décision Tree

In [None]:
mlflow.set_experiment("experiment_credit_default")
with mlflow.start_run() as run:
    model = DecisionTreeClassifier(max_depth = 5, criterion = "gini", random_state=42)
    model.fit(X_train, y_train)

    # On calcule le score du modèle sur le test
    y_pred = model.predict(X_test)
    score = f1_score(y_test, y_pred )
    acc = accuracy_score(y_test, y_pred)

    mlflow.log_params(max_depth = 5, criterion = "gini",)
    mlflow.log_metric("f1", score)
    mlflow.log_metric("accuracy", accuracy_score)

    print(mlflow.get_artifact_uri())
    mlflow.sklearn.log_model(model, "model")

### Modèle avec Regression logistique 

In [None]:
mlflow.set_experiment("experiment_credit_default")
with mlflow.start_run() as run:
    model = LogisticRegression(C = 1.0, solver= "lbfgs", random_state=42)
    model.fit(X_train, y_train)

    # On calcule le score du modèle sur le test
    y_pred = model.predict(X_test)
    f1_weight = f1_score(y_test, y_pred )
    acc = accuracy_score(y_test, y_pred)


    mlflow.log_params(C = 1.0, solver= "lbfgs")
    mlflow.log_metric("f1", f1_score)
    mlflow.log_metric("accuracy", accuracy_score)

    print(mlflow.get_artifact_uri())
    mlflow.sklearn.log_model(model, "model")