In [1]:
import optuna
import joblib
import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stesml.model_tools import build_train_test_model

  from pandas import MultiIndex, Int64Index


In [6]:
def objective(trial):   
    # Save the study before running the next trial
    joblib.dump(study, "../studies/study_" + model_type + "_" + datetime.datetime.now().strftime("%Y%m%d-%H") + ".pkl")
    
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 10, 10000, log=True)
        parameters = {'n_layers': n_layers, 'n_hidden_units': n_hidden_units, 'batch_size': batch_size, 'epochs': 200}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats, t_max=360)
    elif model_type == 'XGBoost':
        scale = False
        learning_rate = trial.suggest_float("learning_rate", 0.01, 1)
        subsample = trial.suggest_float("subsample", 0.01, 1)
        colsample_bytree = trial.suggest_float("colsample_bytree", 0.01, 1)
        num_boost_round = 10000 # Set this as a maximum, model will stop with early stopping
        parameters = {'learning_rate': learning_rate, 'subsample': subsample, 'colsample_bytree': colsample_bytree, 'num_boost_round': num_boost_round}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats, t_min=360)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 200, log=True)
        max_depth = trial.suggest_int("max_depth", 1, 100)
        max_samples = trial.suggest_float("max_samples", .01, 1, log=True)
        parameters = {'n_estimators': n_estimators, 'max_depth': max_depth, 'max_samples': max_samples}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    
    return result

In [4]:
data_dir = "../data/Sulfur_Models/heating/heating_all"
model_type = 'XGBoost' # Options: NN, XGBoost, RandomForest
target = 'h' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2
n_repeats = 1 # Number of times to repeat 5-fold CV. Each repeat gives a different shuffle.

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'
    
load_study = True
study_name = "study_NN_20220708-15.pkl"

In [5]:
if load_study:
    study = joblib.load("../studies/" + study_name)
    print("Best trial until now:")
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
else:
    study = optuna.create_study(direction=direction)

Best trial until now:
 Value:  0.30885909543570805
 Params: 
    n_layers: 1
    n_hidden_units: 82
    batch_size: 10


In [10]:
study.optimize(objective, n_trials=250)

Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:36.08525
[20]	test-rmse:7.25067
[40]	test-rmse:7.49392
[45]	test-rmse:7.54717
Split #0, This Result: 7.5472, Average Result: 7.5472
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:31.49361
[20]	test-rmse:10.56465
[40]	test-rmse:9.00535
[60]	test-rmse:9.27064
[66]	test-rmse:9.30229
Split #1, This Result: 9.3159, Average Result: 8.4315
Parameters: { "num_boost_round" } might 

[32m[I 2022-07-08 19:06:03,009][0m Trial 1 finished with value: 6.694686286377485 and parameters: {'learning_rate': 0.22039735498504276, 'subsample': 0.20661051170249276, 'colsample_bytree': 0.3394535618030828}. Best is trial 1 with value: 6.694686286377485.[0m


Split #4, This Result: 4.3256, Average Result: 6.6947
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:15.15673
[20]	test-rmse:7.34032
[22]	test-rmse:7.25396
Split #0, This Result: 7.2540, Average Result: 7.2540
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:16.30763
[20]	test-rmse:5.80627
[31]	test-rmse:5.98935
Split #1, This Result: 5.9894, Average Result: 6.6217
Parameters: { "num_boost_round" } might not be used.

  

[32m[I 2022-07-08 19:08:08,451][0m Trial 2 finished with value: 5.524612415643093 and parameters: {'learning_rate': 0.7303137624182325, 'subsample': 0.7460404902385005, 'colsample_bytree': 0.8555578185452548}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 5.0932, Average Result: 5.5246
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:43.82908
[20]	test-rmse:25.02223
[40]	test-rmse:15.80593
[60]	test-rmse:11.68704
[80]	test-rmse:9.73049
[100]	test-rmse:8.67115
[120]	test-rmse:7.94514
[140]	test-rmse:7.60492
[160]	test-rmse:7.46429
[180]	test-rmse:7.19073
[200]	test-rmse:7.07516
[220]	test-rmse:6.97005
[240]	test-rmse:6.90494
[260]	test-rmse:6.92436
Split #0, This Result: 6.9265, Average Result: 6.9265
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
 

[32m[I 2022-07-08 19:13:37,904][0m Trial 3 finished with value: 6.49308598486734 and parameters: {'learning_rate': 0.037481463284483825, 'subsample': 0.1165635536838273, 'colsample_bytree': 0.5420456087428783}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 4.3801, Average Result: 6.4931
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:21.51784
[20]	test-rmse:10.44326
[24]	test-rmse:9.86637
Split #0, This Result: 9.8664, Average Result: 9.8664
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:20.33566
[20]	test-rmse:8.75381
[40]	test-rmse:9.06894
[43]	test-rmse:9.08257
Split #1, This Result: 9.0825, Average Result: 9.4745
Parameters: { "num_boost_round" 

[32m[I 2022-07-08 19:15:17,292][0m Trial 4 finished with value: 7.572848715318591 and parameters: {'learning_rate': 0.579664060703109, 'subsample': 0.29535165074077846, 'colsample_bytree': 0.1607612659227494}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 6.5681, Average Result: 7.5728
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:16.67624
[20]	test-rmse:9.66965
[23]	test-rmse:9.75294
Split #0, This Result: 9.7529, Average Result: 9.7529
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:16.70320
[20]	test-rmse:9.23375
[27]	test-rmse:9.88324
Split #1, This Result: 9.8832, Average Result: 9.8181
Parameters: { "num_boost_round" } might not be used.

  

[32m[I 2022-07-08 19:17:06,158][0m Trial 5 finished with value: 7.623634861419721 and parameters: {'learning_rate': 0.7128896432964592, 'subsample': 0.8732911507120946, 'colsample_bytree': 0.1469759235107293}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 6.1274, Average Result: 7.6236
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:10.30826
[20]	test-rmse:8.18831
[23]	test-rmse:8.15433
Split #0, This Result: 8.1543, Average Result: 8.1543
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:11.26027
[20]	test-rmse:7.42756
[23]	test-rmse:7.39851
Split #1, This Result: 7.3985, Average Result: 7.7764
Parameters: { "num_boost_round" } might not be used.

  

[32m[I 2022-07-08 19:19:09,241][0m Trial 6 finished with value: 6.084889095493671 and parameters: {'learning_rate': 0.9965560493141323, 'subsample': 0.7118566668671396, 'colsample_bytree': 0.7585817884517483}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 7.4482, Average Result: 6.0849
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:23.19181
[20]	test-rmse:9.76637
[23]	test-rmse:9.32424
Split #0, This Result: 9.4351, Average Result: 9.4351
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:21.60832
[20]	test-rmse:9.04669
[40]	test-rmse:9.92543
[43]	test-rmse:9.84015
Split #1, This Result: 9.8402, Average Result: 9.6376
Parameters: { "num_boost_round" }

[32m[I 2022-07-08 19:21:04,691][0m Trial 7 finished with value: 7.5000660606765335 and parameters: {'learning_rate': 0.5360431170621014, 'subsample': 0.6077393798054037, 'colsample_bytree': 0.2445039110923439}. Best is trial 2 with value: 5.524612415643093.[0m


Split #4, This Result: 5.9115, Average Result: 7.5001
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:32.26539
[20]	test-rmse:8.18279
[29]	test-rmse:8.89785
Split #0, This Result: 8.8979, Average Result: 8.8979
Parameters: { "num_boost_round" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


[0]	test-rmse:28.44063
[20]	test-rmse:9.57957
[40]	test-rmse:9.62026
[45]	test-rmse:9.61449
Split #1, This Result: 9.6149, Average Result: 9.2564
Parameters: { "num_boost_round" }

KeyboardInterrupt: 

In [None]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621