In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

# Chargement des données
X_train = np.load("data/X_train.npy")
X_test = np.load("data/X_test.npy")
y_train = np.load("data/y_train.npy")
y_test = np.load("data/y_test.npy")

# Équilibrage si nécessaire
unique, counts = np.unique(y_train, return_counts=True)
if np.min(counts) / np.max(counts) < 0.8:
    class_0 = X_train[y_train == 0]
    class_1 = X_train[y_train == 1]
    y0 = y_train[y_train == 0]
    y1 = y_train[y_train == 1]
    if len(y0) < len(y1):
        class_0, y0 = resample(class_0, y0, replace=True, n_samples=len(y1), random_state=42)
    else:
        class_1, y1 = resample(class_1, y1, replace=True, n_samples=len(y0), random_state=42)
    X_train = np.vstack((class_0, class_1))
    y_train = np.concatenate((y0, y1))

# Grilles enrichies
classifiers = {
    "LogisticRegression": (
        LogisticRegression(solver="saga", max_iter=10000),
        {
            "logisticregression__C": [0.01, 0.1, 1, 10, 100],
            "logisticregression__penalty": ["l1", "l2"]
        }
    ),
    "SVC": (
        SVC(),
        {
            "svc__C": [0.5, 1, 2, 3, 5, 10],
            "svc__gamma": [0.01, 0.05, 0.1, 0.2, "scale"],
            "svc__kernel": ["rbf"]
        }
    ),
    "KNN": (
        KNeighborsClassifier(),
        {
            "knn__n_neighbors": [3, 5, 7, 9, 11],
            "knn__weights": ["uniform", "distance"]
        }
    ),
    "MLP": (
        MLPClassifier(max_iter=2000),
        {
            "mlp__hidden_layer_sizes": [(100,), (128, 64), (256, 128)],
            "mlp__alpha": [0.0001, 0.001],
            "mlp__solver": ["adam"],
            "mlp__early_stopping": [True],
            "mlp__learning_rate": ["adaptive"]
        }
    ),
    "AdaBoost": (
        AdaBoostClassifier(),
        {
            "adaboost__n_estimators": [50, 100, 200, 300],
            "adaboost__learning_rate": [0.01, 0.1, 0.5, 1]
        }
    )
}


# Sélections de features
feature_selectors = {
    "none": None,
    "pca": PCA(n_components=0.95),
    "selectkbest": SelectKBest(score_func=f_classif, k=10)
}

# Benchmark
cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
results = []

for fs_name, fs in feature_selectors.items():
    for clf_name, (clf, grid) in classifiers.items():
        steps = [("scaler", StandardScaler())]
        if fs_name != "none":
            steps.append(("feature_selection", fs))
        steps.append((clf_name.lower(), clf))
        pipeline = Pipeline(steps)

        grid_search = GridSearchCV(
            pipeline, grid, cv=cv, scoring="accuracy", n_jobs=-1, verbose=0
        )
        grid_search.fit(X_train, y_train)

        y_pred = grid_search.predict(X_test)
        test_acc = accuracy_score(y_test, y_pred)

        results.append({
            "Model": clf_name,
            "Feature Selection": fs_name,
            "Best Params": grid_search.best_params_,
            "CV Accuracy": grid_search.best_score_,
            "Test Accuracy": test_acc
        })

df_results = pd.DataFrame(results).sort_values(by="Test Accuracy", ascending=False)
df_results.reset_index(drop=True, inplace=True)
df_results


In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils import resample

# Chargement des données
X_train = np.load("data/X_train.npy")
X_test = np.load("data/X_test.npy")
y_train = np.load("data/y_train.npy")
y_test = np.load("data/y_test.npy")

# Équilibrage si nécessaire
unique, counts = np.unique(y_train, return_counts=True)
if np.min(counts) / np.max(counts) < 0.8:
    class_0 = X_train[y_train == 0]
    class_1 = X_train[y_train == 1]
    y0 = y_train[y_train == 0]
    y1 = y_train[y_train == 1]
    if len(y0) < len(y1):
        class_0, y0 = resample(class_0, y0, replace=True, n_samples=len(y1), random_state=42)
    else:
        class_1, y1 = resample(class_1, y1, replace=True, n_samples=len(y0), random_state=42)
    X_train = np.vstack((class_0, class_1))
    y_train = np.concatenate((y0, y1))

# Pipeline ultra boost
mlp_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("mlp", MLPClassifier(
        max_iter=10000,
        early_stopping=True,
        n_iter_no_change=20,
        random_state=42
    ))
])

param_grid_mlp = {
    "mlp__hidden_layer_sizes": [
        (512,), (1024,), (1024, 512), (1024, 512, 256), (2048, 1024, 512)
    ],
    "mlp__alpha": [1e-6, 1e-5, 1e-4, 1e-3],
    "mlp__learning_rate": ["adaptive"],
    "mlp__solver": ["adam"]
}

cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

grid_mlp = GridSearchCV(
    mlp_pipeline,
    param_grid_mlp,
    cv=cv,
    scoring="accuracy",
    n_jobs=-1,
    verbose=2
)

grid_mlp.fit(X_train, y_train)

y_pred = grid_mlp.predict(X_test)
test_acc = accuracy_score(y_test, y_pred)

print(f"✅ Best params: {grid_mlp.best_params_}")
print(f"📈 CV Accuracy: {grid_mlp.best_score_:.4f}")
print(f"🏁 Test Accuracy: {test_acc:.4f}")


Fitting 10 folds for each of 20 candidates, totalling 200 fits
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   2.7s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.2s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.3s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.3s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.6s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.7s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp__learning_rate=adaptive, mlp__solver=adam; total time=   3.8s
[CV] END mlp__alpha=1e-06, mlp__hidden_layer_sizes=(512,), mlp