In [7]:
import optuna
import joblib
import datetime

In [8]:
from stesml.model_tools import train_and_validate_model

In [9]:
def objective(trial):   
    # Save the study before running the next trial
    joblib.dump(study, "../studies/study_" + model_type + "_" + datetime.datetime.now().strftime("%Y%m%d-%H") + ".pkl")
    
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 1, 100, log=True)
        parameters = {'n_layers': n_layers, 'n_hidden_units': n_hidden_units, 'batch_size': batch_size, 'epochs': 200}
        result, addendum = train_and_validate_model(data_dir, model_type, target, metric, scale, parameters, n_repeats, t_max=360, split_test_data=split_test_data)
    elif model_type == 'XGBoost':
        scale = False
        learning_rate = trial.suggest_float("learning_rate", 0.01, 1)
        num_boost_round = 10000 # Set this as a maximum, model will stop with early stopping
        parameters = {'learning_rate': learning_rate, 'num_boost_round': num_boost_round}
        result, addendum = train_and_validate_model(data_dir, model_type, target, metric, scale, parameters, n_repeats, split_test_data=split_test_data)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 200, log=True)
        max_depth = trial.suggest_int("max_depth", 1, 100)
        max_samples = trial.suggest_float("max_samples", .01, 1, log=True)
        parameters = {'n_estimators': n_estimators, 'max_depth': max_depth, 'max_samples': max_samples}
        result, addendum = train_and_validate_model(data_dir, model_type, target, metric, scale, parameters, n_repeats, split_test_data=split_test_data)
    
    return result

In [10]:
data_dir = "../data/Sulfur_Models/heating/heating_all"
model_type = 'XGBoost' # Options: NN, XGBoost, RandomForest
target = 'h' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2
n_repeats = 1 # Number of times to repeat 5-fold CV. Each repeat gives a different shuffle.
split_test_data = True # Split data into train (64%), val (16%), and test (20%) (True) or just train (80%) and val (20%) (False)

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'

load_study = False
study_name = "study_XGBoost_20220714-09.pkl"

In [11]:
if load_study:
    study = joblib.load("../studies/" + study_name)
    print("Best trial until now:")
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
else:
    study = optuna.create_study(direction=direction)

[32m[I 2022-07-18 12:11:48,992][0m A new study created in memory with name: no-name-31c265be-d0a1-4334-ac1b-b17b654e83f7[0m


In [12]:
study.optimize(objective, n_trials=250)

Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:97.86226
[20]	test-rmse:24.31489
[40]	test-rmse:8.76030
[60]	test-rmse:6.67400
[80]	test-rmse:6.45699
[100]	test-rmse:6.39986
[120]	test-rmse:6.36710
[140]	test-rmse:6.28952
[160]	test-rmse:6.25675
[180]	test-rmse:6.21325
[200]	test-rmse:6.20491
[220]	test-rmse:6.20457
[228]	test-rmse:6.20474
Split #0, This Result: 6.2047, Average Result: 6.2047
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cas

[32m[I 2022-07-18 12:26:45,780][0m Trial 0 finished with value: 4.353045531223534 and parameters: {'learning_rate': 0.07124237992671427}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 2.9083, Average Result: 4.3530
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:74.54008
[20]	test-rmse:6.84860
[40]	test-rmse:6.62746
[60]	test-rmse:6.57717
[80]	test-rmse:6.51551
[100]	test-rmse:6.51009
[120]	test-rmse:6.50727
[140]	test-rmse:6.49613
[160]	test-rmse:6.49007
[180]	test-rmse:6.47706
[200]	test-rmse:6.47467
[220]	test-rmse:6.47382
[240]	test-rmse:6.46819
[255]	test-rmse:6.46948
Split #0, This Result: 6.4696, Average Result: 6.4696
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  bu

[32m[I 2022-07-18 12:42:45,308][0m Trial 1 finished with value: 4.369823234555178 and parameters: {'learning_rate': 0.2982187595588735}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 2.7532, Average Result: 4.3698
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:39.87043
[20]	test-rmse:6.86044
[40]	test-rmse:6.63881
[60]	test-rmse:6.55759
[80]	test-rmse:6.54751
[100]	test-rmse:6.53834
[120]	test-rmse:6.53755
[140]	test-rmse:6.52918
[160]	test-rmse:6.52601
[180]	test-rmse:6.52194
[200]	test-rmse:6.52384
[203]	test-rmse:6.52381
Split #0, This Result: 6.5238, Average Result: 6.5238
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an i

[32m[I 2022-07-18 12:53:16,543][0m Trial 2 finished with value: 4.543381194448861 and parameters: {'learning_rate': 0.6420568038686019}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 3.0405, Average Result: 4.5434
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:29.76937
[20]	test-rmse:8.49889
[40]	test-rmse:8.37600
[60]	test-rmse:8.35666
[71]	test-rmse:8.35783
Split #0, This Result: 8.3578, Average Result: 8.3578
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:26.53769
[20]	test-rmse:6.88829
[40]	test-rmse:6.82507
[60]	test-rmse:6.78068
[75]	test-rmse:6.78181
Split #1, This Res

[32m[I 2022-07-18 12:58:53,253][0m Trial 3 finished with value: 5.252532586571922 and parameters: {'learning_rate': 0.7474561798309677}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 4.4454, Average Result: 5.2525
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:20.69204
[20]	test-rmse:7.59031
[40]	test-rmse:7.57916
[50]	test-rmse:7.60095
Split #0, This Result: 7.6010, Average Result: 7.6010
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:18.40678
[20]	test-rmse:6.47404
[40]	test-rmse:6.17919
[58]	test-rmse:6.16704
Split #1, This Result: 6.1671, Average Result: 6.8840
Parameters

[32m[I 2022-07-18 13:03:25,674][0m Trial 4 finished with value: 5.129486368582173 and parameters: {'learning_rate': 0.8513608907372331}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 4.1512, Average Result: 5.1295
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:81.43060
[20]	test-rmse:6.94803
[40]	test-rmse:6.68151
[60]	test-rmse:6.64358
[80]	test-rmse:6.54389
[100]	test-rmse:6.52789
[120]	test-rmse:6.52328
[140]	test-rmse:6.51373
[160]	test-rmse:6.50961
[180]	test-rmse:6.50299
[200]	test-rmse:6.48949
[220]	test-rmse:6.49055
[224]	test-rmse:6.49046
Split #0, This Result: 6.4905, Average Result: 6.4905
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongl

[32m[I 2022-07-18 13:13:30,233][0m Trial 5 finished with value: 4.457140238683293 and parameters: {'learning_rate': 0.2309862836163988}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 2.7763, Average Result: 4.4571
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:58.45140
[20]	test-rmse:7.23946
[40]	test-rmse:7.11262
[60]	test-rmse:7.05165
[80]	test-rmse:6.99667
[100]	test-rmse:6.98475
[120]	test-rmse:6.96939
[140]	test-rmse:6.96894
[160]	test-rmse:6.95388
[180]	test-rmse:6.94983
[200]	test-rmse:6.94799
[220]	test-rmse:6.94515
[233]	test-rmse:6.95619
Split #0, This Result: 6.9564, Average Result: 6.9564
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongl

[32m[I 2022-07-18 13:23:31,254][0m Trial 6 finished with value: 4.63222919575051 and parameters: {'learning_rate': 0.4561469292194776}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 3.5726, Average Result: 4.6322
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:57.48723
[20]	test-rmse:6.68749
[40]	test-rmse:6.55452
[60]	test-rmse:6.52345
[80]	test-rmse:6.50670
[100]	test-rmse:6.49554
[120]	test-rmse:6.49313
[140]	test-rmse:6.48272
[160]	test-rmse:6.47521
[180]	test-rmse:6.46924
[200]	test-rmse:6.46282
[220]	test-rmse:6.46107
[236]	test-rmse:6.46190
Split #0, This Result: 6.4619, Average Result: 6.4619
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongl

[32m[I 2022-07-18 13:33:55,267][0m Trial 7 finished with value: 4.771180029615265 and parameters: {'learning_rate': 0.46567067410818896}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 3.7387, Average Result: 4.7712
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:41.08541
[20]	test-rmse:6.49167
[40]	test-rmse:6.22839
[60]	test-rmse:6.16133
[80]	test-rmse:6.12941
[100]	test-rmse:6.11174
[120]	test-rmse:6.10162
[140]	test-rmse:6.08020
[154]	test-rmse:6.07993
Split #0, This Result: 6.0798, Average Result: 6.0798
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:36.59774
[20]	test-rmse

[32m[I 2022-07-18 13:44:07,060][0m Trial 8 finished with value: 4.615883880852982 and parameters: {'learning_rate': 0.629682447647605}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 3.1379, Average Result: 4.6159
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:78.53274
[20]	test-rmse:6.26211
[40]	test-rmse:5.78884
[60]	test-rmse:5.74973
[80]	test-rmse:5.72783
[100]	test-rmse:5.70757
[120]	test-rmse:5.70835
[134]	test-rmse:5.70567
Split #0, This Result: 5.7057, Average Result: 5.7057
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:69.76681
[20]	test-rmse:6.45788
[40]	test-rmse:

[32m[I 2022-07-18 13:56:44,493][0m Trial 9 finished with value: 4.358902504523096 and parameters: {'learning_rate': 0.25924658404049106}. Best is trial 0 with value: 4.353045531223534.[0m


Split #4, This Result: 3.0923, Average Result: 4.3589
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:102.28700
[20]	test-rmse:58.69284
[40]	test-rmse:34.15188
[60]	test-rmse:20.33116
[80]	test-rmse:12.87987
[100]	test-rmse:9.16770
[120]	test-rmse:7.50491
[140]	test-rmse:6.85377
[160]	test-rmse:6.62807
[180]	test-rmse:6.54529
[200]	test-rmse:6.52165
[220]	test-rmse:6.51245
[240]	test-rmse:6.48210
[260]	test-rmse:6.44527
[280]	test-rmse:6.41788
[300]	test-rmse:6.41768
[304]	test-rmse:6.41570
Split #0, This Result: 6.4153, Average Result: 6.4153
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mista

[32m[I 2022-07-18 14:37:52,222][0m Trial 10 finished with value: 4.304502234734034 and parameters: {'learning_rate': 0.028326389959607595}. Best is trial 10 with value: 4.304502234734034.[0m


Split #4, This Result: 3.1214, Average Result: 4.3045
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:103.75775
[20]	test-rmse:78.71982
[40]	test-rmse:59.95507
[60]	test-rmse:45.74416
[80]	test-rmse:35.08567
[100]	test-rmse:27.08765
[120]	test-rmse:21.00374
[140]	test-rmse:16.65107
[160]	test-rmse:13.43546
[180]	test-rmse:11.16351
[200]	test-rmse:9.57466
[220]	test-rmse:8.47469
[240]	test-rmse:7.74722
[260]	test-rmse:7.29557
[280]	test-rmse:7.00392
[300]	test-rmse:6.84176
[320]	test-rmse:6.75688
[340]	test-rmse:6.67866
[360]	test-rmse:6.62701
[380]	test-rmse:6.59599
[400]	test-rmse:6.58846
[420]	test-rmse:6.56684
[439]	test-rmse:6.57371
Split #0, This Result: 6.5737, Average Result: 6.5737
Parameters: {

KeyboardInterrupt: 

In [None]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621