In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.model_selection import cross_val_score
import xgboost as xgb
import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('datasets/final_boston.csv')
X = df.drop('MEDV', axis=1)
y = df['MEDV']
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
def objective(trial):
  regressor_name = trial.suggest_categorical('regressor', ['XGB', 'RF', 'GB'])
  if regressor_name == 'XGB':
    model = xgb.XGBRegressor(
        n_estimators = trial.suggest_int('xgb_n_estimators', 100, 500, step=100),
        learning_rate = trial.suggest_float('xgb_learning_rate', 0.01, 0.1, step=0.01),
        max_depth = trial.suggest_int('xgb_max_depth', 3, 6),
        subsample = trial.suggest_float('xgb_subsample', 0.6, 1.0, step=0.1),
        colsample_bytree = trial.suggest_float('xgb_colsample_bytree', 0.6, 1.0, step=0.1),
        min_child_weight = trial.suggest_int('xgb_min_child_weight', 1, 5),
        gamma = trial.suggest_int('xgb_gamma', 0, 5, step=1),
        reg_alpha = trial.suggest_float('xgb_reg_alpha', 0.0, 1.0, step=0.1),
        reg_lambda = trial.suggest_int('xgb_reg_lambda', 1, 10, step=1),
        verbosity=0,
        objective='reg:squarederror',
        random_state=42
    )
  elif regressor_name == 'RF':
    model = RandomForestRegressor(
        n_estimators = trial.suggest_int('rf_n_estimators', 100, 500, step=100),
        max_depth = trial.suggest_int('rf_max_depth', 5, 20),
        min_samples_split = trial.suggest_int('rf_min_samples_split', 2, 10),
        min_samples_leaf = trial.suggest_int('rf_min_samples_leaf', 1, 10),
        max_features = trial.suggest_categorical('rf_max_features', ['sqrt', 'log2']),
        random_state=42
    )
  else:
    model = GradientBoostingRegressor(
        n_estimators = trial.suggest_int('gb_n_estimators', 100, 500, step=100),
        learning_rate = trial.suggest_float('gb_learning_rate', 0.01, 0.1, step=0.01),
        max_depth = trial.suggest_int('gb_max_depth', 3, 5),
        min_samples_split = trial.suggest_int('gb_min_samples_split', 2, 10),
        min_samples_leaf = trial.suggest_int('gb_min_samples_leaf', 1, 10),
        subsample = trial.suggest_float('gb_subsample', 0.6, 1.0, step=0.1),
        random_state=42
    )
  score = cross_val_score(model, train_X, train_y, cv=5, scoring='r2').mean()
  return score

In [4]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler())
study.optimize(objective, n_trials=100)

[32m[I 2026-01-24 23:20:51,977][0m A new study created in memory with name: no-name-1a8c0a18-f176-4f7a-9820-87aac6c58274[0m
[32m[I 2026-01-24 23:20:52,334][0m Trial 0 finished with value: 0.8686898484852259 and parameters: {'regressor': 'XGB', 'xgb_n_estimators': 100, 'xgb_learning_rate': 0.08, 'xgb_max_depth': 3, 'xgb_subsample': 0.6, 'xgb_colsample_bytree': 1.0, 'xgb_min_child_weight': 1, 'xgb_gamma': 0, 'xgb_reg_alpha': 0.1, 'xgb_reg_lambda': 9}. Best is trial 0 with value: 0.8686898484852259.[0m
[32m[I 2026-01-24 23:20:52,601][0m Trial 1 finished with value: 0.8661446563887152 and parameters: {'regressor': 'XGB', 'xgb_n_estimators': 100, 'xgb_learning_rate': 0.09, 'xgb_max_depth': 3, 'xgb_subsample': 0.6, 'xgb_colsample_bytree': 0.8, 'xgb_min_child_weight': 3, 'xgb_gamma': 5, 'xgb_reg_alpha': 0.7000000000000001, 'xgb_reg_lambda': 8}. Best is trial 0 with value: 0.8686898484852259.[0m
[32m[I 2026-01-24 23:20:54,999][0m Trial 2 finished with value: 0.8576837348865028 and p

In [5]:
print(f'Best trial accuracy: {study.best_trial.value}')
print(f'Best params: {study.best_trial.params}')

Best trial accuracy: 0.8873328583801708
Best params: {'regressor': 'GB', 'gb_n_estimators': 500, 'gb_learning_rate': 0.04, 'gb_max_depth': 3, 'gb_min_samples_split': 5, 'gb_min_samples_leaf': 2, 'gb_subsample': 0.6}


In [6]:
stdf = study.trials_dataframe()
stdf.to_csv('datasets/trials.csv')