<a href="https://colab.research.google.com/github/RoetGer/decisions-under-uncertainty/blob/main/hyperparameter_optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
!pip install optuna
!pip install catboost
!pip install -U sklearn



In [8]:
import optuna
import sklearn.metrics as skm

from catboost import CatBoostRegressor
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [42]:
from functools import partial

import numpy as np

def mean_absolute_prc_error(ytrue, ypred):
  return np.mean(np.abs(ytrue - ypred)/ytrue)

def median_absolute_prc_error(ytrue, ypred):
  return np.median(np.abs(ytrue - ypred)/ytrue)

def eval_mdl(mdl):
  error_funcs = [
    skm.mean_absolute_error,
    mean_absolute_prc_error,
    skm.median_absolute_error,
    median_absolute_prc_error,
    skm.mean_squared_error          
  ]

  print('Validation')
  [
    print(func.__name__, func(yvalid, mdl.predict(Xvalid)))
      for func in error_funcs
  ]
  
  print('\nTest')
  [
    print(func.__name__, func(ytest, mdl.predict(Xtest)))
      for func in error_funcs
  ]

In [4]:
X, y = load_boston(return_X_y=True)

In [7]:
Xtrain, Xtest, ytrain, ytest = train_test_split(
  X, y, 
  test_size=.25, 
  random_state=5)

Xvalid, Xtest, yvalid, ytest = train_test_split(
  Xtest, ytest,
  test_size=.5,
  random_state=5)

In [39]:
params = dict(
  loss_function='MAE'
  , iterations=500
  , learning_rate=None
  , l2_leaf_reg=3.
  , bootstrap_type='No'
  , random_strength=None
  , use_best_model=True
  , max_depth=6
  , grow_policy='SymmetricTree'
  , one_hot_max_size=2
  , rsm=None #1 Random subspace method
  , nan_mode='Min'
  , leaf_estimation_method='Exact'
  , early_stopping_rounds=20
  , random_seed=5
)

mdl = CatBoostRegressor(**params)

mdl.fit(
  Xtrain, 
  ytrain, 
  eval_set=(Xvalid, yvalid))

0:	learn: 6.4141837	test: 5.6837766	best: 5.6837766 (0)	total: 49.5ms	remaining: 24.7s
1:	learn: 6.2663281	test: 5.5538885	best: 5.5538885 (1)	total: 51.6ms	remaining: 12.9s
2:	learn: 6.1443543	test: 5.4451517	best: 5.4451517 (2)	total: 53.7ms	remaining: 8.89s
3:	learn: 6.0216052	test: 5.3407240	best: 5.3407240 (3)	total: 55.7ms	remaining: 6.91s
4:	learn: 5.8978459	test: 5.2342340	best: 5.2342340 (4)	total: 58ms	remaining: 5.75s
5:	learn: 5.7805496	test: 5.1342124	best: 5.1342124 (5)	total: 60.3ms	remaining: 4.96s
6:	learn: 5.6719392	test: 5.0331422	best: 5.0331422 (6)	total: 62.6ms	remaining: 4.41s
7:	learn: 5.5540421	test: 4.9312116	best: 4.9312116 (7)	total: 65.3ms	remaining: 4.01s
8:	learn: 5.4405730	test: 4.8470331	best: 4.8470331 (8)	total: 67.6ms	remaining: 3.69s
9:	learn: 5.3546517	test: 4.7916323	best: 4.7916323 (9)	total: 69.8ms	remaining: 3.42s
10:	learn: 5.2465085	test: 4.6954344	best: 4.6954344 (10)	total: 72.1ms	remaining: 3.2s
11:	learn: 5.1492941	test: 4.6100199	best: 4

<catboost.core.CatBoostRegressor at 0x7fbb77bf33d0>

In [43]:
eval_mdl(mdl)

Validation
mean_absolute_error 1.939828952620464
mean_absolute_prc_error 0.10141613633890169
median_absolute_error 1.409625108276547
median_absolute_prc_error 0.06844816711750623
mean_squared_error 8.383751736628101

Test
mean_absolute_error 1.9001536588384225
mean_absolute_prc_error 0.09079983178421996
median_absolute_error 1.2270915426960176
median_absolute_prc_error 0.05898540411016738
mean_squared_error 9.587766915596655


In [27]:
!pip install --upgrade sklearn



In [16]:
%%time
### Optune hyperparameter optimization

def objective(trial):
  params = dict(
    loss_function='MAE'
    , iterations=500
    , learning_rate=None
    , l2_leaf_reg=3.
    , bootstrap_type='No'
    , random_strength=None
    , use_best_model=True
    , max_depth=trial.suggest_int('max_depth', 3, 12)
    , grow_policy='SymmetricTree'
    , one_hot_max_size=2
    , rsm=None #1 Random subspace method
    , nan_mode='Min'
    , leaf_estimation_method='Exact'
    , early_stopping_rounds=20
    , random_seed=5
    , logging_level='Silent'
  )

  mdl = CatBoostRegressor(**params)

  mdl.fit(
    Xtrain, 
    ytrain, 
    eval_set=(Xvalid, yvalid))
  
  return skm.mean_absolute_error(yvalid, mdl.predict(Xvalid))

study = optuna.create_study(
    direction='minimize', 
    sampler=optuna.samplers.TPESampler(seed=5))

study.optimize(
    objective, 
    n_trials=100, 
    timeout=600)

AttributeError: ignored

In [None]:
optuna

In [18]:
skm.mean_squared_error.__name__

'mean_squared_error'