In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.neighbors import KNeighborsClassifier
import joblib

# Chargement du dataset nettoyé
df = pd.read_csv("../data/heart_cleaned.csv")


In [5]:
# Séparation features / cible
X = df.drop("target", axis=1)
y = df["target"]

# Train/Test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print("Taille train :", X_train.shape)
print("Taille test  :", X_test.shape)
# Entraînement des modèles
models = {
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "XGBoost": xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss'),
    "KNN": KNeighborsClassifier(n_neighbors=5)
}   

Taille train : (242, 22)
Taille test  : (61, 22)


In [6]:
clf_lr = LogisticRegression(max_iter=1000)
clf_lr.fit(X_train, y_train)
joblib.dump(clf_lr, "../backend/models/heart_lr.pkl")
print("Logistic Regression entraînée et sauvegardée.")
clf_rf = RandomForestClassifier(n_estimators=100, random_state=42)
clf_rf.fit(X_train, y_train)
joblib.dump(clf_rf, "../backend/models/heart_rf.pkl")
print("Random Forest entraînée et sauvegardée.")
clf_xgb = xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')
clf_xgb.fit(X_train, y_train)
joblib.dump(clf_xgb, "../backend/models/heart_xgb.pkl")
print("XGBoost entraînée et sauvegardée.")
clf_knn = KNeighborsClassifier(n_neighbors=5)
clf_knn.fit(X_train, y_train)
joblib.dump(clf_knn, "../backend/models/heart_knn.pkl")         

Logistic Regression entraînée et sauvegardée.
Random Forest entraînée et sauvegardée.
XGBoost entraînée et sauvegardée.


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


['../backend/models/heart_knn.pkl']

In [7]:
from sklearn.metrics import accuracy_score

for name, model in [
    ("Logistic Regression", clf_lr),
    ("Random Forest"      , clf_rf),
    ("XGBoost"            , clf_xgb),
    ("KNN"                , clf_knn)
]:
    acc = accuracy_score(y_test, model.predict(X_test))
    print(f"{name:20s} accuracy : {acc:.3f}")


Logistic Regression  accuracy : 0.869
Random Forest        accuracy : 0.770
XGBoost              accuracy : 0.721
KNN                  accuracy : 0.787
