In [2]:
# √âtape 1 : Importer les biblioth√®ques n√©cessaires
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report
import mlflow
import mlflow.sklearn
import seaborn as sns
import matplotlib.pyplot as plt

# √âtape 2 : Charger les donn√©es
data = pd.read_csv(r"C:\Users\marin\Downloads\Loan_Data.csv")

# Visualisation des premi√®res lignes du dataset
data.head()

# √âtape 3 : Pr√©traitement des donn√©es
X = data[['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']]
y = data['default']

# Division des donn√©es en ensembles d'entra√Ænement et de test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Normalisation des donn√©es
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# √âtape 4 : Initialiser le serveur MLflow (utilise un port diff√©rent)
mlflow.set_tracking_uri("http://127.0.0.1:6003")

# √âtape 5 : Entra√Æner et enregistrer un mod√®le de R√©gression Logistique
mlflow.set_experiment("prediction_defaut_credit_regression_logistique")

with mlflow.start_run(run_name="Mod√®le R√©gression Logistique"):
    log_reg = LogisticRegression(solver='lbfgs', max_iter=1000, random_state=42)
    log_reg.fit(X_train_scaled, y_train)
    
    y_pred = log_reg.predict(X_test_scaled)
    
    # √âvaluation du mod√®le
    accuracy = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("auc", auc)
    
    # Enregistrer le mod√®le dans MLflow
    mlflow.sklearn.log_model(log_reg, "modele_regression_logistique")

    # Afficher quelques m√©triques
    print(f"Accuracy: {accuracy}")
    print(f"AUC: {auc}")
    print(confusion_matrix(y_test, y_pred))
    print(classification_report(y_test, y_pred))

# √âtape 6 : Entra√Æner et enregistrer un mod√®le Random Forest
mlflow.set_experiment("prediction_defaut_credit_random_forest")

with mlflow.start_run(run_name="Mod√®le Random Forest"):
    rf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf.fit(X_train_scaled, y_train)
    
    y_pred_rf = rf.predict(X_test_scaled)
    
    # √âvaluation du mod√®le
    accuracy_rf = accuracy_score(y_test, y_pred_rf)
    auc_rf = roc_auc_score(y_test, y_pred_rf)
    
    mlflow.log_metric("accuracy", accuracy_rf)
    mlflow.log_metric("auc", auc_rf)
    
    # Enregistrer le mod√®le dans MLflow
    mlflow.sklearn.log_model(rf, "modele_random_forest")
    
    # Afficher quelques m√©triques
    print(f"Accuracy: {accuracy_rf}")
    print(f"AUC: {auc_rf}")
    print(confusion_matrix(y_test, y_pred_rf))
    print(classification_report(y_test, y_pred_rf))


2024/09/16 21:32:00 INFO mlflow.tracking._tracking_service.client: üèÉ View run Mod√®le R√©gression Logistique at: http://127.0.0.1:6003/#/experiments/540196153384736126/runs/39da0c0dcad44e148a02727690a8979e.
2024/09/16 21:32:00 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:6003/#/experiments/540196153384736126.
2024/09/16 21:32:00 INFO mlflow.tracking.fluent: Experiment with name 'prediction_defaut_credit_random_forest' does not exist. Creating a new experiment.


Accuracy: 0.9963333333333333
AUC: 0.9912754008624999
[[2457    2]
 [   9  532]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2459
           1       1.00      0.98      0.99       541

    accuracy                           1.00      3000
   macro avg       1.00      0.99      0.99      3000
weighted avg       1.00      1.00      1.00      3000



2024/09/16 21:32:04 INFO mlflow.tracking._tracking_service.client: üèÉ View run Mod√®le Random Forest at: http://127.0.0.1:6003/#/experiments/212656686080867273/runs/61c44eb2ef934fbb85a1021843a55781.
2024/09/16 21:32:04 INFO mlflow.tracking._tracking_service.client: üß™ View experiment at: http://127.0.0.1:6003/#/experiments/212656686080867273.


Accuracy: 0.9963333333333333
AUC: 0.9919962805913469
[[2456    3]
 [   8  533]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      2459
           1       0.99      0.99      0.99       541

    accuracy                           1.00      3000
   macro avg       1.00      0.99      0.99      3000
weighted avg       1.00      1.00      1.00      3000

