In [3]:
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.pipeline import Pipeline
from sklearn.ensemble import VotingClassifier
from xgboost import XGBClassifier

In [4]:
X_train = np.load("C:\\Users\\abela\\Desktop\\Epita\\FTML\\Exo5\\X_test.npy")
y_train = np.load("C:\\Users\\abela\\Desktop\\Epita\\FTML\\Exo5\\y_test.npy")
X_test = np.load("C:\\Users\\abela\\Desktop\\Epita\\FTML\\Exo5\\X_train.npy")
y_test = np.load("C:\\Users\\abela\\Desktop\\Epita\\FTML\\Exo5\\y_train.npy")

In [9]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('feature_selection', SelectKBest(f_classif, k='all')),
])

X_train_prepared = pipeline.fit_transform(X_train, y_train)
X_test_prepared = pipeline.transform(X_test)

def evaluate_model(model, X_train, X_test, y_train, y_test, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Test accuracy: {accuracy:.4f}")
    print(f"{model_name} Classification Report:")
    print(classification_report(y_test, y_pred))
    return accuracy

svm_params = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['rbf', 'poly', 'sigmoid']
}
svm = GridSearchCV(SVC(random_state=42), svm_params, cv=5, scoring='accuracy', n_jobs=-1)
svm_accuracy = evaluate_model(svm, X_train_prepared, X_test_prepared, y_train, y_test, "SVM (optimized)")

rf_params = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}
rf = GridSearchCV(RandomForestClassifier(random_state=42), rf_params, cv=5, scoring='accuracy', n_jobs=-1)
rf_accuracy = evaluate_model(rf, X_train_prepared, X_test_prepared, y_train, y_test, "Random Forest (optimized)")

gb = GradientBoostingClassifier(random_state=42)
gb_accuracy = evaluate_model(gb, X_train_prepared, X_test_prepared, y_train, y_test, "Gradient Boosting")

xgb = XGBClassifier(random_state=42)
xgb_accuracy = evaluate_model(xgb, X_train_prepared, X_test_prepared, y_train, y_test, "XGBoost")

ensemble = VotingClassifier(
    estimators=[
        ('svm', svm.best_estimator_),
        ('rf', rf.best_estimator_),
        ('gb', gb),
        ('xgb', xgb)
    ],
    voting='soft'
)

ensemble.estimators[0][1].probability = True

ensemble_accuracy = evaluate_model(ensemble, X_train_prepared, X_test_prepared, y_train, y_test, "Ensemble")
models = {
    "SVM (optimized)": svm_accuracy,
    "Random Forest (optimized)": rf_accuracy,
    "Gradient Boosting": gb_accuracy,
    "XGBoost": xgb_accuracy,
    "Ensemble": ensemble_accuracy
}

for name, accuracy in models.items():
    print(f"{name} accuracy: {accuracy:.4f}")

best_model = max(models, key=models.get)
best_accuracy = models[best_model]

if best_accuracy > 0.85:
    print(f"\nL'objectif de précision supérieure à 0.85 sur l'ensemble de test est atteint avec un score de {best_accuracy:.4f} pour le modèle {best_model}.")
else:
    print(f"\nL'objectif de précision supérieure à 0.85 sur l'ensemble de test n'est pas atteint. Le meilleur score est {best_accuracy:.4f} pour le modèle {best_model}.")

if hasattr(rf.best_estimator_, 'feature_importances_'):
    importances = rf.best_estimator_.feature_importances_
    indices = np.argsort(importances)[::-1]
    
    print("\nImportance des caractéristiques:")
    for f, idx in enumerate(indices):
        print("{0} - Feature {1}: {2:.4f}".format(f + 1, idx, importances[idx]))

print("\nConclusion:")
print("1. Le meilleur modèle est", best_model)
print(f"2. La meilleure précision obtenue est {best_accuracy:.4f}")

SVM (optimized) Test accuracy: 0.7460
SVM (optimized) Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.56      0.68       241
           1       0.69      0.92      0.79       259

    accuracy                           0.75       500
   macro avg       0.78      0.74      0.73       500
weighted avg       0.78      0.75      0.74       500

Random Forest (optimized) Test accuracy: 0.7900
Random Forest (optimized) Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.70      0.76       241
           1       0.76      0.87      0.81       259

    accuracy                           0.79       500
   macro avg       0.80      0.79      0.79       500
weighted avg       0.80      0.79      0.79       500

Gradient Boosting Test accuracy: 0.7580
Gradient Boosting Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.68   

(500,)
[0 1 0 1 1 0 0 0 1 1 0 0 0 0 1 1 1 1 0 0 0 0 0 1 1 0 1 0 1 0 0 1 1 1 1 0 0
 0 0 1 1 0 1 1 0 1 0 1 1 0 1 1 0 0 0 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1
 0 1 0 1 1 1 1 0 1 1 1 1 1 0 1 0 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
 1 1 0 1 0 0 1 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1 0 0 1 0 0 1 1 0 0 1 0 1 1
 1 1 0 1 1 0 1 0 0 1 0 0 0 0 0 0 1 0 0 1 0 1 1 1 1 0 1 1 1 1 1 0 0 1 0 0 0
 1 0 0 1 0 0 1 0 1 1 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 1 1 0 0 1 1 0 0 1 1 0
 0 0 0 0 0 1 1 0 1 1 0 0 0 1 1 1 0 1 0 1 1 0 0 0 1 1 1 1 1 0 0 0 0 0 1 0 0
 1 0 0 0 1 1 0 0 1 0 1 1 1 0 1 0 1 0 1 1 0 1 1 1 0 1 0 1 1 0 1 1 0 0 0 0 1
 1 0 1 0 1 0 1 0 0 0 0 1 1 0 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 0 1
 0 0 1 1 1 1 0 0 1 1 1 0 0 1 0 1 1 0 1 1 1 0 1 0 0 1 0 0 1 0 0 1 1 1 1 1 0
 1 1 1 0 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 0 1 0 1 1 0 0 0 1 0
 1 1 0 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 0 0 1 0 0 0 1 1 1 0 1 0 0 1 0
 1 0 0 0 1 1 0 0 1 0 1 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 0 0 1 0 0 1 1 0 1 1 1
 0 1 0 0 0 1 0 0 0