In [2]:
# ============================
# 1. Imports
# ============================
import numpy as np
from pathlib import Path

from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

from sklearn.linear_model import Ridge, Lasso
from sklearn.svm import SVR
from sklearn.ensemble import AdaBoostRegressor

# ============================
# 2. Chargement des données
# ============================

DATA_DIR = Path("Document/regression") 

X_train = np.load(DATA_DIR / "X_train.npy")
X_test  = np.load(DATA_DIR / "X_test.npy")
y_train = np.load(DATA_DIR / "y_train.npy").ravel()
y_test  = np.load(DATA_DIR / "y_test.npy").ravel()

print("X_train:", X_train.shape)
print("X_test :", X_test.shape)
print("y_train:", y_train.shape)
print("y_test :", y_test.shape)

# ============================
# 3. Modèle 1 : Ridge Regression
# ============================

ridge_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge())
])

ridge_params = {
    "model__alpha": [0.1, 1, 10, 50, 100]
}

ridge_search = GridSearchCV(
    estimator=ridge_pipe,
    param_grid=ridge_params,
    cv=5,
    scoring="r2",
    n_jobs=-1
)

print("\nEntraînement du modèle Ridge...")
ridge_search.fit(X_train, y_train)

y_pred_ridge = ridge_search.predict(X_test)
r2_ridge = r2_score(y_test, y_pred_ridge)

print("=== Ridge Regression ===")
print("Meilleurs hyperparamètres :", ridge_search.best_params_)
print("R² (train) :", ridge_search.best_score_)
print("R² (test)  :", r2_ridge)

# ============================
# 4. Modèle 2 : Lasso Regression
# ============================

lasso_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Lasso(max_iter=20000))
])

lasso_params = {
    "model__alpha": [0.0001, 0.001, 0.01, 0.1, 1.0]
}

lasso_search = GridSearchCV(
    estimator=lasso_pipe,
    param_grid=lasso_params,
    cv=5,
    scoring="r2",
    n_jobs=-1
)

print("\nEntraînement du modèle Lasso...")
lasso_search.fit(X_train, y_train)

y_pred_lasso = lasso_search.predict(X_test)
r2_lasso = r2_score(y_test, y_pred_lasso)

print("=== Lasso Regression ===")
print("Meilleurs hyperparamètres :", lasso_search.best_params_)
print("R² (train) :", lasso_search.best_score_)
print("R² (test)  :", r2_lasso)

# ============================
# 5. Modèle 3 : SVR (optionnel mais conseillé)
# ============================

svr_pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("model", SVR())
])

svr_params = {
    "model__C": [1, 10, 100],
    "model__gamma": ["scale", "auto"],
    "model__kernel": ["rbf"]  # on peut tester 'poly' aussi
}

svr_search = GridSearchCV(
    estimator=svr_pipe,
    param_grid=svr_params,
    cv=3,
    scoring="r2",
    n_jobs=-1
)

print("\nEntraînement du modèle SVR...")
svr_search.fit(X_train, y_train)

y_pred_svr = svr_search.predict(X_test)
r2_svr = r2_score(y_test, y_pred_svr)

print("=== SVR ===")
print("Meilleurs hyperparamètres :", svr_search.best_params_)
print("R² (train) :", svr_search.best_score_)
print("R² (test)  :", r2_svr)

# ============================
# 6. Modèle 4 : AdaBoostRegressor (optionnel aussi)
# ============================

ada = AdaBoostRegressor(random_state=42)

ada_params = {
    "n_estimators": [50, 100, 200],
    "learning_rate": [0.01, 0.1, 1.0]
}

ada_search = GridSearchCV(
    estimator=ada,
    param_grid=ada_params,
    cv=3,
    scoring="r2",
    n_jobs=-1
)

print("\nEntraînement du modèle AdaBoostRegressor...")
ada_search.fit(X_train, y_train)

y_pred_ada = ada_search.predict(X_test)
r2_ada = r2_score(y_test, y_pred_ada)

print("=== AdaBoostRegressor ===")
print("Meilleurs hyperparamètres :", ada_search.best_params_)
print("R² (train) :", ada_search.best_score_)
print("R² (test)  :", r2_ada)

# ============================
# 7. Récapitulatif des R²
# ============================

scores = {
    "Ridge": r2_ridge,
    "Lasso": r2_lasso,
    "SVR": r2_svr,
    "AdaBoost": r2_ada
}

print("\n=== R² sur le test pour chaque modèle ===")
for name, score in scores.items():
    print(f"{name:8s} : {score:.4f}")

best_model_name = max(scores, key=scores.get)
print(f"\nMeilleur modèle sur le test : {best_model_name} (R² = {scores[best_model_name]:.4f})")


X_train: (200, 200)
X_test : (200, 200)
y_train: (200,)
y_test : (200,)

Entraînement du modèle Ridge...
=== Ridge Regression ===
Meilleurs hyperparamètres : {'model__alpha': 10}
R² (train) : 0.6057036150134036
R² (test)  : 0.7585841733355716

Entraînement du modèle Lasso...
=== Lasso Regression ===
Meilleurs hyperparamètres : {'model__alpha': 0.01}
R² (train) : 0.9227173780977346
R² (test)  : 0.939614907187149

Entraînement du modèle SVR...
=== SVR ===
Meilleurs hyperparamètres : {'model__C': 10, 'model__gamma': 'scale', 'model__kernel': 'rbf'}
R² (train) : 0.2545793370504172
R² (test)  : 0.3330386237114059

Entraînement du modèle AdaBoostRegressor...
=== AdaBoostRegressor ===
Meilleurs hyperparamètres : {'learning_rate': 1.0, 'n_estimators': 200}
R² (train) : 0.3229289048838953
R² (test)  : 0.5260616768596243

=== R² sur le test pour chaque modèle ===
Ridge    : 0.7586
Lasso    : 0.9396
SVR      : 0.3330
AdaBoost : 0.5261

Meilleur modèle sur le test : Lasso (R² = 0.9396)
