In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.metrics import (
    classification_report, confusion_matrix,
    precision_score, recall_score
)
import joblib

# Charger modèle + scaler
model = joblib.load("artifacts_diab_no_bias/model_lightgbm_diab_no_bias.pkl")
scaler = joblib.load("artifacts_diab_no_bias/scaler_diab_no_bias.pkl")

# Charger les CSV test générés dans le notebook 15
X_test = pd.read_csv("X_test_chol.csv")
y_test = pd.read_csv("y_test_chol.csv").values.ravel()

print("OK - Modèle et jeux test chargés.")


OK - Modèle et jeux test chargés.


In [2]:
# Cellule 2 — Recalculer les colonnes NUM / BINAIRES

ethn = [
    "ethnicity_MexicanAmerican", "ethnicity_OtherHispanic",
    "ethnicity_NonHispanicWhite", "ethnicity_NonHispanicBlack",
    "ethnicity_NonHispanicAsian", "ethnicity_OtherRace"
]

binary_cols = ["RIAGENDR", "smoker"] + ethn
binary_cols = [c for c in binary_cols if c in X_test.columns]
num_cols = [c for c in X_test.columns if c not in binary_cols]


In [3]:
# Cellule 3 — Recréer X_test_scaled

X_test_scaled = X_test.copy()
X_test_scaled[num_cols] = scaler.transform(X_test[num_cols])


In [4]:
# Cellule 4 — Probabilités correctes

probas = model.predict_proba(X_test_scaled)[:, 1]
print("Probabilités générées.")

Probabilités générées.


In [5]:
def evaluate_threshold(threshold, y_true, probas):
    y_pred = (probas >= threshold).astype(int)

    print("="*60)
    print(f"SEUIL = {threshold:.3f}")
    print("="*60)

    print(classification_report(y_true, y_pred, digits=3))

    prec = precision_score(y_true, y_pred)
    rec = recall_score(y_true, y_pred)

    print(f"Precision : {prec:.3f}")
    print(f"Recall    : {rec:.3f}")
    print("Confusion matrix :")
    print(confusion_matrix(y_true, y_pred))

    return prec, rec


In [6]:
thresholds = np.arange(0.05, 0.95, 0.05)

rows = []
for th in thresholds:
    prec, rec = evaluate_threshold(th, y_test, probas)
    rows.append([th, prec, rec])

df_thr = pd.DataFrame(rows, columns=["threshold", "precision", "recall"])
df_thr


SEUIL = 0.050
              precision    recall  f1-score   support

           0      0.998     0.361     0.531      1223
           1      0.165     0.994     0.283       155

    accuracy                          0.433      1378
   macro avg      0.581     0.677     0.407      1378
weighted avg      0.904     0.433     0.503      1378

Precision : 0.165
Recall    : 0.994
Confusion matrix :
[[442 781]
 [  1 154]]
SEUIL = 0.100
              precision    recall  f1-score   support

           0      0.998     0.426     0.597      1223
           1      0.180     0.994     0.305       155

    accuracy                          0.490      1378
   macro avg      0.589     0.710     0.451      1378
weighted avg      0.906     0.490     0.564      1378

Precision : 0.180
Recall    : 0.994
Confusion matrix :
[[521 702]
 [  1 154]]
SEUIL = 0.150
              precision    recall  f1-score   support

           0      0.995     0.465     0.634      1223
           1      0.189     0.981     0

Unnamed: 0,threshold,precision,recall
0,0.05,0.164706,0.993548
1,0.1,0.179907,0.993548
2,0.15,0.188586,0.980645
3,0.2,0.196636,0.980645
4,0.25,0.20433,0.974194
5,0.3,0.209302,0.929032
6,0.35,0.218944,0.909677
7,0.4,0.227953,0.883871
8,0.45,0.236462,0.845161
9,0.5,0.25813,0.819355
