In [None]:
# Cellule 1 - Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from src.data.data_loader import DataLoader
from src.features.feature_engineer import FeatureEngineer
from src.models.model_trainer import ModelTrainer
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc

print("🤖 Entraînement des Modèles de Machine Learning")

In [None]:
# Cellule 2 - Préparation des données
loader = DataLoader()
df = loader.load_raw_data()

fe = FeatureEngineer()
df_engineered = fe.create_features(df)

X = df_engineered.drop('Class', axis=1)
y = df_engineered['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

print("✅ Données prêtes pour l'entraînement")

In [None]:
# Cellule 3 - Entraînement des modèles
trainer = ModelTrainer()
results = trainer.train_models(X_train_res, y_train_res, X_test, y_test)

best_model, best_score = trainer.get_best_model()
print(f"🎯 MEILLEUR MODÈLE: AUC = {best_score:.4f}")

In [None]:
# Cellule 4 - Évaluation détaillée
from sklearn.metrics import precision_recall_curve

# Prédictions du meilleur modèle
y_pred = best_model.predict(X_test)
y_proba = best_model.predict_proba(X_test)[:, 1]

print("📊 RAPPORT DE CLASSIFICATION:")
print(classification_report(y_test, y_pred))

print("\n🎯 MATRICE DE CONFUSION:")
print(confusion_matrix(y_test, y_pred))