In [None]:

# Evaluation function
def evaluate_regression(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    r2 = r2_score(y_true, y_pred)
    return rmse, r2


# ===========================
# Optuna objective for XGBoost
# ===========================
def objective_xgb(trial):
    n_estimators = trial.suggest_int("n_estimators", 100, 500)
    max_depth = trial.suggest_int("max_depth", 3, 10)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.2)
    subsample = trial.suggest_float("subsample", 0.5, 1.0)
    colsample_bytree = trial.suggest_float("colsample_bytree", 0.5, 1.0)
    
    model = Pipeline([
        ('preprocess', preprocessor),
        ('model', XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            objective='reg:squarederror',
            random_state=42
        ))
    ])
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse, _ = evaluate_regression(y_test, y_pred)
    return rmse

# Optuna study for XGBoost
study_xgb = optuna.create_study(direction="minimize")
study_xgb.optimize(objective_xgb, n_trials=30)

print("Best XGB params:", study_xgb.best_params)

# Train best XGBoost model
best_xgb = Pipeline([
    ('preprocess', preprocessor),
    ('model', XGBRegressor(**study_xgb.best_params, objective='reg:squarederror', random_state=42))
])
best_xgb.fit(X_train, y_train)
y_pred_xgb = best_xgb.predict(X_test)
rmse_xgb, r2_xgb = evaluate_regression(y_test, y_pred_xgb)
print(f"\nXGBoost Regressor --> RMSE: {rmse_xgb:.4f}, R2: {r2_xgb:.4f}")