In [6]:
from sklearn.model_selection import cross_val_score
import pandas as pd

# ModÃ¨les de Machine Learning
from sklearn.linear_model import LogisticRegression  # RÃ©gression logistique
from sklearn.tree import DecisionTreeClassifier      # Arbre de dÃ©cision
from sklearn.ensemble import RandomForestClassifier  # ForÃªt alÃ©atoire
from sklearn.ensemble import GradientBoostingClassifier  # Gradient Boosting
from sklearn.ensemble import AdaBoostClassifier  # AdaBoost (modÃ¨le performant)

# MÃ©triques d'Ã©valuation
from sklearn.metrics import (
    accuracy_score,      # PrÃ©cision globale
    precision_score,     # PrÃ©cision (VP / (VP + FP))
    recall_score,        # Rappel (VP / (VP + FN))
    f1_score,            # F1-Score (moyenne harmonique prÃ©cision/rappel)
    roc_auc_score,       # AUC-ROC (aire sous la courbe ROC)
    confusion_matrix,    # Matrice de confusion
    classification_report,  # Rapport dÃ©taillÃ©
    roc_curve           # Courbe ROC
)

# Optimisation des hyperparamÃ¨tres
from sklearn.model_selection import GridSearchCV, cross_val_score  # Recherche de grille et validation croisÃ©e


import joblib


In [8]:
X_train = pd.read_csv('X_train_prepared.csv')
X_test = pd.read_csv('X_test_prepared.csv')
y_train = pd.read_csv('y_train_prepared.csv').values.ravel()
y_test = pd.read_csv('y_test_prepared.csv').values.ravel()


In [9]:
print("\n\nðŸ¤– Ã‰TAPE 5 : ENTRAÃŽNEMENT ET COMPARAISON DE MODÃˆLES")
print("=" * 80)

# Dictionnaire pour stocker les modÃ¨les et leurs performances
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'Decision Tree': DecisionTreeClassifier(random_state=42, max_depth=10),
    'Random Forest': RandomForestClassifier(random_state=42, n_estimators=100),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42, n_estimators=100),
    'AdaBoost': AdaBoostClassifier(random_state=42, n_estimators=100)
}

# Dictionnaire pour stocker les rÃ©sultats
results = {}



ðŸ¤– Ã‰TAPE 5 : ENTRAÃŽNEMENT ET COMPARAISON DE MODÃˆLES


In [11]:
print("\nðŸ”„ EntraÃ®nement des modÃ¨les en cours...\n")

for name, model in models.items():
    print(f"{'=' * 60}")
    print(f"ðŸ“Š ModÃ¨le : {name}")
    print(f"{'=' * 60}")
    
    # EntraÃ®nement du modÃ¨le
    model.fit(X_train, y_train)
    print(f"  âœ… EntraÃ®nement terminÃ©")
    
    # PrÃ©dictions sur l'ensemble de test
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    # Calcul des mÃ©triques
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_proba)
    
    # Sauvegarde des rÃ©sultats
    results[name] = {
        'model': model,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'roc_auc': roc_auc,
        'y_pred': y_pred,
        'y_pred_proba': y_pred_proba
    }
    
    # Affichage des performances
    print(f"\n  ðŸ“ˆ Performances sur l'ensemble de test :")
    print(f"     â€¢ Accuracy  : {accuracy:.4f} ({accuracy*100:.2f}%)")
    print(f"     â€¢ Precision : {precision:.4f}")
    print(f"     â€¢ Recall    : {recall:.4f}")
    print(f"     â€¢ F1-Score  : {f1:.4f}")
    print(f"     â€¢ ROC-AUC   : {roc_auc:.4f}")
    
    # Validation croisÃ©e (5-fold)
    cv_scores = cross_val_score(model, X_train, y_train, 
                                cv=5, scoring='roc_auc')
    print(f"\n  ðŸ”„ Validation croisÃ©e (5-fold) - ROC-AUC :")
    print(f"     â€¢ Moyenne : {cv_scores.mean():.4f}")
    print(f"     â€¢ Ã‰cart-type : {cv_scores.std():.4f}")
    print()



ðŸ”„ EntraÃ®nement des modÃ¨les en cours...

ðŸ“Š ModÃ¨le : Logistic Regression
  âœ… EntraÃ®nement terminÃ©

  ðŸ“ˆ Performances sur l'ensemble de test :
     â€¢ Accuracy  : 0.8084 (80.84%)
     â€¢ Precision : 0.6745
     â€¢ Recall    : 0.5374
     â€¢ F1-Score  : 0.5982
     â€¢ ROC-AUC   : 0.8467

  ðŸ”„ Validation croisÃ©e (5-fold) - ROC-AUC :
     â€¢ Moyenne : 0.8489
     â€¢ Ã‰cart-type : 0.0132

ðŸ“Š ModÃ¨le : Decision Tree
  âœ… EntraÃ®nement terminÃ©

  ðŸ“ˆ Performances sur l'ensemble de test :
     â€¢ Accuracy  : 0.7715 (77.15%)
     â€¢ Precision : 0.5751
     â€¢ Recall    : 0.5321
     â€¢ F1-Score  : 0.5528
     â€¢ ROC-AUC   : 0.7553

  ðŸ”„ Validation croisÃ©e (5-fold) - ROC-AUC :
     â€¢ Moyenne : 0.7516
     â€¢ Ã‰cart-type : 0.0116

ðŸ“Š ModÃ¨le : Random Forest
  âœ… EntraÃ®nement terminÃ©

  ðŸ“ˆ Performances sur l'ensemble de test :
     â€¢ Accuracy  : 0.7885 (78.85%)
     â€¢ Precision : 0.6267
     â€¢ Recall    : 0.5027
     â€¢ F1-Score  : 0.5579
     



  âœ… EntraÃ®nement terminÃ©

  ðŸ“ˆ Performances sur l'ensemble de test :
     â€¢ Accuracy  : 0.8055 (80.55%)
     â€¢ Precision : 0.6678
     â€¢ Recall    : 0.5321
     â€¢ F1-Score  : 0.5923
     â€¢ ROC-AUC   : 0.8399





  ðŸ”„ Validation croisÃ©e (5-fold) - ROC-AUC :
     â€¢ Moyenne : 0.8415
     â€¢ Ã‰cart-type : 0.0160

