In [42]:
import optuna
import joblib
import datetime

In [43]:
from stesml.model_tools import build_train_test_model

In [52]:
def objective(trial):   
    # Save the study before running the next trial
    joblib.dump(study, "../studies/study_" + model_type + "_" + datetime.datetime.now().strftime("%Y%m%d-%H") + ".pkl")
    
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 10, 10000, log=True)
        epochs = 20 # Neglect optmizing # of epochs, allow early stopping to determine # of epochs
        parameters = {'n_layers': n_layers, 'n_hidden_units': n_hidden_units, 'batch_size': batch_size, 'epochs': epochs}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    elif model_type == 'XGBoost':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 50, 10000, log=True)
        parameters = {'n_estimators': n_estimators}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 200, log=True)
        parameters = {'n_estimators': n_estimators}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    
    return result

In [53]:
data_dir = "../data/Sulfur_Models/"
model_type = 'RandomForest' # Options: NN, XGBoost, RandomForest
target = 'h' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2
n_repeats = 2 # Number of times to repeat 5-fold CV. Each repeat gives a different shuffle.

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'
    
load_study = False
study_name = "study_NN_20220630-18.pkl"

In [54]:
if load_study:
    study = joblib.load("../studies/" + study_name)
    print("Best trial until now:")
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
else:
    study = optuna.create_study(direction=direction)

[32m[I 2022-06-30 21:56:05,692][0m A new study created in memory with name: no-name-c95db6f6-4cd2-4be6-884d-c9ec4d0e82dd[0m


In [55]:
study.optimize(objective, n_trials=50)

Split #0, This Result: 5.6353, Average Result: 5.6353
Split #1, This Result: 6.2788, Average Result: 5.9571
Split #2, This Result: 4.4736, Average Result: 5.4626
Split #3, This Result: 5.1284, Average Result: 5.3790


[32m[I 2022-06-30 21:58:12,474][0m Trial 0 finished with value: 7.618012417118429 and parameters: {'n_estimators': 16}. Best is trial 0 with value: 7.618012417118429.[0m


Split #4, This Result: 16.5739, Average Result: 7.6180
Split #0, This Result: 5.6259, Average Result: 5.6259
Split #1, This Result: 6.2705, Average Result: 5.9482
Split #2, This Result: 4.4708, Average Result: 5.4557
Split #3, This Result: 4.1043, Average Result: 5.1179


[32m[I 2022-06-30 22:06:18,301][0m Trial 1 finished with value: 7.407360133792676 and parameters: {'n_estimators': 113}. Best is trial 1 with value: 7.407360133792676.[0m


Split #4, This Result: 16.5653, Average Result: 7.4074
Split #0, This Result: 5.7070, Average Result: 5.7070
Split #1, This Result: 6.3394, Average Result: 6.0232
Split #2, This Result: 4.7003, Average Result: 5.5823
Split #3, This Result: 8.8776, Average Result: 6.4061


[32m[I 2022-06-30 22:07:30,751][0m Trial 2 finished with value: 8.448413050861713 and parameters: {'n_estimators': 1}. Best is trial 1 with value: 7.407360133792676.[0m


Split #4, This Result: 16.6177, Average Result: 8.4484
Split #0, This Result: 5.6302, Average Result: 5.6302
Split #1, This Result: 6.2763, Average Result: 5.9533
Split #2, This Result: 4.4746, Average Result: 5.4604
Split #3, This Result: 4.8176, Average Result: 5.2997


[32m[I 2022-06-30 22:09:35,051][0m Trial 3 finished with value: 7.551990249730645 and parameters: {'n_estimators': 21}. Best is trial 1 with value: 7.407360133792676.[0m


Split #4, This Result: 16.5612, Average Result: 7.5520
Split #0, This Result: 5.6828, Average Result: 5.6828
Split #1, This Result: 6.2891, Average Result: 5.9859
Split #2, This Result: 4.5855, Average Result: 5.5191
Split #3, This Result: 3.1634, Average Result: 4.9302


[32m[I 2022-06-30 22:10:45,416][0m Trial 4 finished with value: 7.261170381221359 and parameters: {'n_estimators': 2}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5851, Average Result: 7.2612
Split #0, This Result: 5.6244, Average Result: 5.6244
Split #1, This Result: 6.2705, Average Result: 5.9475
Split #2, This Result: 4.4664, Average Result: 5.4538
Split #3, This Result: 4.3465, Average Result: 5.1770


[32m[I 2022-06-30 22:20:28,680][0m Trial 5 finished with value: 7.454010783875385 and parameters: {'n_estimators': 133}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5622, Average Result: 7.4540
Split #0, This Result: 5.6212, Average Result: 5.6212
Split #1, This Result: 6.2701, Average Result: 5.9457
Split #2, This Result: 4.4700, Average Result: 5.4538
Split #3, This Result: 4.3195, Average Result: 5.1702


[32m[I 2022-06-30 22:31:40,793][0m Trial 6 finished with value: 7.449463892826415 and parameters: {'n_estimators': 154}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5665, Average Result: 7.4495
Split #0, This Result: 5.6240, Average Result: 5.6240
Split #1, This Result: 6.2711, Average Result: 5.9475
Split #2, This Result: 4.4828, Average Result: 5.4593
Split #3, This Result: 4.4588, Average Result: 5.2092


[32m[I 2022-06-30 22:34:21,953][0m Trial 7 finished with value: 7.4814876852958605 and parameters: {'n_estimators': 30}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5708, Average Result: 7.4815
Split #0, This Result: 5.6377, Average Result: 5.6377
Split #1, This Result: 6.2718, Average Result: 5.9547
Split #2, This Result: 4.4956, Average Result: 5.4684
Split #3, This Result: 5.6465, Average Result: 5.5129


[32m[I 2022-06-30 22:35:38,014][0m Trial 8 finished with value: 7.7229967481332435 and parameters: {'n_estimators': 4}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5634, Average Result: 7.7230
Split #0, This Result: 5.6309, Average Result: 5.6309
Split #1, This Result: 6.2791, Average Result: 5.9550
Split #2, This Result: 4.4803, Average Result: 5.4635
Split #3, This Result: 4.8838, Average Result: 5.3185


[32m[I 2022-06-30 22:37:32,199][0m Trial 9 finished with value: 7.568197967140558 and parameters: {'n_estimators': 14}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5668, Average Result: 7.5682
Split #0, This Result: 5.7283, Average Result: 5.7283
Split #1, This Result: 6.3088, Average Result: 6.0186
Split #2, This Result: 4.6432, Average Result: 5.5601
Split #3, This Result: 4.5247, Average Result: 5.3013


[32m[I 2022-06-30 22:38:44,988][0m Trial 10 finished with value: 7.557438723405002 and parameters: {'n_estimators': 1}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5822, Average Result: 7.5574
Split #0, This Result: 5.6661, Average Result: 5.6661
Split #1, This Result: 6.2979, Average Result: 5.9820
Split #2, This Result: 4.4869, Average Result: 5.4836
Split #3, This Result: 6.7832, Average Result: 5.8085


[32m[I 2022-06-30 22:39:58,100][0m Trial 11 finished with value: 7.965055831023426 and parameters: {'n_estimators': 2}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5912, Average Result: 7.9651
Split #0, This Result: 5.6308, Average Result: 5.6308
Split #1, This Result: 6.2784, Average Result: 5.9546
Split #2, This Result: 4.5024, Average Result: 5.4705
Split #3, This Result: 4.1336, Average Result: 5.1363


[32m[I 2022-06-30 22:41:16,668][0m Trial 12 finished with value: 7.424687234982372 and parameters: {'n_estimators': 5}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5783, Average Result: 7.4247
Split #0, This Result: 5.6319, Average Result: 5.6319
Split #1, This Result: 6.2751, Average Result: 5.9535
Split #2, This Result: 4.4749, Average Result: 5.4606
Split #3, This Result: 4.4154, Average Result: 5.1993


[32m[I 2022-06-30 22:45:12,437][0m Trial 13 finished with value: 7.472396158740445 and parameters: {'n_estimators': 50}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5647, Average Result: 7.4724
Split #0, This Result: 5.6382, Average Result: 5.6382
Split #1, This Result: 6.2758, Average Result: 5.9570
Split #2, This Result: 4.5191, Average Result: 5.4777
Split #3, This Result: 5.2579, Average Result: 5.4227


[32m[I 2022-06-30 22:46:33,592][0m Trial 14 finished with value: 7.657299350826553 and parameters: {'n_estimators': 6}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5955, Average Result: 7.6573
Split #0, This Result: 5.6237, Average Result: 5.6237
Split #1, This Result: 6.2709, Average Result: 5.9473
Split #2, This Result: 4.4648, Average Result: 5.4532
Split #3, This Result: 4.3896, Average Result: 5.1873


[32m[I 2022-06-30 22:50:38,123][0m Trial 15 finished with value: 7.46136046901492 and parameters: {'n_estimators': 54}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5577, Average Result: 7.4614
Split #0, This Result: 5.6548, Average Result: 5.6548
Split #1, This Result: 6.3116, Average Result: 5.9832
Split #2, This Result: 4.5816, Average Result: 5.5160
Split #3, This Result: 3.9970, Average Result: 5.1362


[32m[I 2022-06-30 22:51:51,534][0m Trial 16 finished with value: 7.426824151545139 and parameters: {'n_estimators': 2}. Best is trial 4 with value: 7.261170381221359.[0m


Split #4, This Result: 16.5892, Average Result: 7.4268
Split #0, This Result: 5.6941, Average Result: 5.6941
Split #1, This Result: 6.2908, Average Result: 5.9924
Split #2, This Result: 4.5665, Average Result: 5.5171


Exception ignored in: <function WeakKeyDictionary.__init__.<locals>.remove at 0x7fe63fda0ee0>
Traceback (most recent call last):
  File "/Users/kmenear/Projects/envSulfurTES/lib/python3.9/weakref.py", line 370, in remove
    def remove(k, selfref=ref(self)):
KeyboardInterrupt: 


KeyboardInterrupt: 

In [17]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621