In [1]:
import optuna
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error, r2_score
import xgboost as xgb

In [2]:
features = pd.read_csv('features.csv', index_col=0)

In [3]:
train_features_cols = [i for i in features.columns if i not in ['date', 'target0',  'open', 'high', 'low', 'close', 'volume', 'Name']]
train_target_cols = ['target0']

In [4]:
traindf = features.loc[(features['date'] < '2016-01-01')]
testdf = features.loc[features['date'] > '2016-01-01']

In [5]:
Xtrain = traindf[train_features_cols].loc[0]
Ytrain = traindf[train_target_cols].loc[0]

Xtest = testdf[train_features_cols].loc[0]
Ytest = testdf[train_target_cols].loc[0]

In [6]:
def objective(trial):
    params = {
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 8, 64),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'subsample': trial.suggest_uniform('subsample', 0.5, 1),
        'colsample_bytree': trial.suggest_uniform('colsample_bytree', 0.5, 1),
        'gamma': trial.suggest_uniform('gamma', 0, 25),
        'reg_alpha': trial.suggest_uniform('reg_alpha', 0, 1),
        'reg_lambda': trial.suggest_uniform('reg_lambda', 0, 1),
        'n_estimators': trial.suggest_int('n_estimators', 16, 512),
    }
    # set random seed
    np.random.seed(0)
    
    model = xgb.XGBRegressor(**params)

    model.fit(Xtrain, Ytrain)

    preds = model.predict(Xtest)
    
    return r2_score(Ytest, preds)

In [7]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=200)

[32m[I 2022-08-26 02:32:45,230][0m A new study created in memory with name: no-name-a295a038-471a-4273-a5ad-0ce4bae9350f[0m
[32m[I 2022-08-26 02:32:46,799][0m Trial 0 finished with value: -0.04700134180956583 and parameters: {'learning_rate': 0.09175360455243153, 'max_depth': 52, 'min_child_weight': 10, 'subsample': 0.5542525144484951, 'colsample_bytree': 0.8720285913978921, 'gamma': 10.654836915831678, 'reg_alpha': 0.059063245783968155, 'reg_lambda': 0.5882327226851127, 'n_estimators': 253}. Best is trial 0 with value: -0.04700134180956583.[0m
[32m[I 2022-08-26 02:32:47,353][0m Trial 1 finished with value: -0.033532410846563776 and parameters: {'learning_rate': 0.07235505367421337, 'max_depth': 49, 'min_child_weight': 8, 'subsample': 0.9299772102002775, 'colsample_bytree': 0.8500532788008424, 'gamma': 19.233282641158862, 'reg_alpha': 0.4072908044948148, 'reg_lambda': 0.7303640136259812, 'n_estimators': 247}. Best is trial 1 with value: -0.033532410846563776.[0m
[32m[I 2022-0

In [8]:

print('Number of finished trials: {}'.format(len(study.trials)))
print('Best trial:')
trial = study.best_trial
print('  Value: {}'.format(trial.value))
print('  Params: ')
for key, value in trial.params.items():
    print('    {}: {}'.format(key, value))
    

Number of finished trials: 200
Best trial:
  Value: -5.536733893585222e-08
  Params: 
    learning_rate: 0.20502459658040176
    max_depth: 9
    min_child_weight: 2
    subsample: 0.7827731119445701
    colsample_bytree: 0.8229789001158825
    gamma: 24.32992268673283
    reg_alpha: 0.883386020804104
    reg_lambda: 0.6131101263524059
    n_estimators: 24


Number of finished trials: 200
Best trial:
  Value: -7.00209401571783e-09
  Params: 
    learning_rate: 0.12596576491431577
    max_depth: 61
    min_child_weight: 8
    subsample: 0.70162396122708
    colsample_bytree: 0.732697097744053
    gamma: 16.402224989581324
    reg_alpha: 0.8669616672221077
    reg_lambda: 0.3188654526752628
    n_estimators: 125