In [2]:

# =========================
# OPTUNA + XGBOOST REGRESSION
# =========================
import optuna
import xgboost as xgb
from sklearn.model_selection import cross_val_score
import pandas as pd

# -------------------------
# Paramètres
# -------------------------
TRAIN_FILE = "../ressources/npyDS/DataSetLasso/train.csv"
VAL_FILE   = "../ressources/npyDS/DataSetLasso/val.csv"
TEST_FILE  = "../ressources/npyDS/DataSetLasso/test.csv"
Y_NAME = "y"

EARLY_STOPPING_ROUNDS = 50
NUM_ITERATIONS = 30000
RANDOM_STATE = 42

In [3]:

# -------------------------
# Chargement des données
# -------------------------
train_df = pd.read_csv(TRAIN_FILE)
val_df   = pd.read_csv(VAL_FILE)
test_df  = pd.read_csv(TEST_FILE)

y_train = train_df[Y_NAME].to_numpy()
X_train = train_df.drop(columns=[Y_NAME]).to_numpy()

y_val = val_df[Y_NAME].to_numpy()
X_val = val_df.drop(columns=[Y_NAME]).to_numpy()

X_test = test_df.to_numpy()


In [4]:

# -------------------------
# Objective Optuna
# -------------------------
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 500, 3000),
        "max_depth": trial.suggest_int("max_depth", 4, 10),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
        "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 10.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 10.0),
        "random_state": 42,
        "tree_method": "hist",
        "objective": "reg:squarederror"
    }

    model = xgb.XGBRegressor(**params)

    score = cross_val_score(
        model,
        X_train,
        y_train,
        cv=3,
        scoring="r2",
        n_jobs=-1
    ).mean()

    return score

In [5]:

# -------------------------
# Lancement Optuna
# -------------------------
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

print("Meilleurs paramètres :", study.best_params)
print("Meilleur R² :", study.best_value)

[I 2025-12-18 00:02:51,228] A new study created in memory with name: no-name-eb6fb3ef-ca98-4328-b406-c290d009c376
[I 2025-12-18 00:03:10,523] Trial 0 finished with value: 0.9172300517414801 and parameters: {'n_estimators': 1796, 'max_depth': 6, 'learning_rate': 0.04007915640802488, 'subsample': 0.6669524748864966, 'colsample_bytree': 0.6309362266542007, 'min_child_weight': 7, 'reg_alpha': 3.361764433073583, 'reg_lambda': 5.955866838695907}. Best is trial 0 with value: 0.9172300517414801.
[I 2025-12-18 00:03:30,646] Trial 1 finished with value: 0.895166916504481 and parameters: {'n_estimators': 2815, 'max_depth': 4, 'learning_rate': 0.013835447438848477, 'subsample': 0.8947165954979458, 'colsample_bytree': 0.8637026924188091, 'min_child_weight': 9, 'reg_alpha': 5.970428366013037, 'reg_lambda': 1.0188759655338009}. Best is trial 0 with value: 0.9172300517414801.
[I 2025-12-18 00:03:38,313] Trial 2 finished with value: 0.9119583567260002 and parameters: {'n_estimators': 584, 'max_depth': 

Meilleurs paramètres : {'n_estimators': 2993, 'max_depth': 7, 'learning_rate': 0.08763675484099007, 'subsample': 0.9700702911996296, 'colsample_bytree': 0.852472208909238, 'min_child_weight': 9, 'reg_alpha': 0.018564423579985112, 'reg_lambda': 0.22538057440301235}
Meilleur R² : 0.9357937734154661


In [6]:

# -------------------------
# Entraînement final
# -------------------------
# best_params = study.best_params
# best_params.update({
#     "random_state": 42,
#     "tree_method": "hist",
#     "objective": "reg:squarederror"
# })
#
# final_model = xgb.XGBRegressor(**best_params)
#
# final_model.fit(
#     X_train, y_train,
#     eval_set=[(X_val, y_val)],
#     verbose=200
# )