In [1]:
import optuna
import joblib
import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from stesml.model_tools import build_train_test_model

  from pandas import MultiIndex, Int64Index


In [3]:
def objective(trial):   
    # Save the study before running the next trial
    joblib.dump(study, "../studies/study_" + model_type + "_" + datetime.datetime.now().strftime("%Y%m%d-%H") + ".pkl")
    
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 10, 10000, log=True)
        parameters = {'n_layers': n_layers, 'n_hidden_units': n_hidden_units, 'batch_size': batch_size}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    elif model_type == 'XGBoost':
        scale = False
        learning_rate = trial.suggest_float("learning_rate", 0.01, 1)
        subsample = trial.suggest_float("subsample", 0.01, 1)
        colsample_bytree = trial.suggest_float("colsample_bytree", 0.01, 1)
        num_boost_round = 10000 # Set this as a maximum, model will stop with early stopping
        parameters = {'learning_rate': learning_rate, 'subsample': subsample, 'colsample_bytree': colsample_bytree, 'num_boost_round': num_boost_round}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 200, log=True)
        max_depth = trial.suggest_int("max_depth", 1, 100)
        max_samples = trial.suggest_float("max_samples", .01, 1, log=True)
        parameters = {'n_estimators': n_estimators, 'max_depth': max_depth, 'max_samples': max_samples}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters, n_repeats)
    
    return result

In [4]:
data_dir = "../data/Sulfur_Models/"
model_type = 'RandomForest' # Options: NN, XGBoost, RandomForest
target = 'h' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2
n_repeats = 2 # Number of times to repeat 5-fold CV. Each repeat gives a different shuffle.

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'
    
load_study = True
study_name = "study_RandomForest_20220705-12.pkl"

In [5]:
if load_study:
    study = joblib.load("../studies/" + study_name)
    print("Best trial until now:")
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
else:
    study = optuna.create_study(direction=direction)

Best trial until now:
 Value:  6.710196429609631
 Params: 
    n_estimators: 150
    max_depth: 64
    max_samples: 0.8785156026362354


In [None]:
study.optimize(objective, n_trials=250)

Split #0, This Result: 5.6260, Average Result: 5.6260
Split #1, This Result: 6.2747, Average Result: 5.9503
Split #2, This Result: 4.4762, Average Result: 5.4590
Split #3, This Result: 4.7551, Average Result: 5.2830
Split #4, This Result: 16.5594, Average Result: 7.5383
Split #5, This Result: 11.8042, Average Result: 8.2493
Split #6, This Result: 3.5641, Average Result: 7.5799
Split #7, This Result: 6.1615, Average Result: 7.4026
Split #8, This Result: 4.9851, Average Result: 7.1340


[32m[I 2022-07-05 14:16:22,919][0m Trial 14 finished with value: 6.773366674795023 and parameters: {'n_estimators': 186, 'max_depth': 67, 'max_samples': 0.7089963660387368}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.5274, Average Result: 6.7734
Split #0, This Result: 5.6231, Average Result: 5.6231
Split #1, This Result: 6.2705, Average Result: 5.9468
Split #2, This Result: 4.4711, Average Result: 5.4549
Split #3, This Result: 4.3705, Average Result: 5.1838
Split #4, This Result: 16.5614, Average Result: 7.4593
Split #5, This Result: 11.7977, Average Result: 8.1824
Split #6, This Result: 3.7056, Average Result: 7.5428
Split #7, This Result: 6.1637, Average Result: 7.3705
Split #8, This Result: 5.0044, Average Result: 7.1076
Split #9, This Result: 3.5226, Average Result: 6.7491


[32m[I 2022-07-05 14:47:58,235][0m Trial 15 finished with value: 6.74905605227238 and parameters: {'n_estimators': 181, 'max_depth': 95, 'max_samples': 0.9810025329922659}. Best is trial 12 with value: 6.710196429609631.[0m


Split #0, This Result: 6.9917, Average Result: 6.9917
Split #1, This Result: 7.5253, Average Result: 7.2585
Split #2, This Result: 5.6629, Average Result: 6.7267
Split #3, This Result: 5.9972, Average Result: 6.5443
Split #4, This Result: 16.8786, Average Result: 8.6112
Split #5, This Result: 12.6662, Average Result: 9.2870
Split #6, This Result: 4.8286, Average Result: 8.6501
Split #7, This Result: 7.3104, Average Result: 8.4826
Split #8, This Result: 5.4226, Average Result: 8.1426


[32m[I 2022-07-05 14:51:08,551][0m Trial 16 finished with value: 7.887281446930777 and parameters: {'n_estimators': 83, 'max_depth': 30, 'max_samples': 0.06333333259268929}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 5.5892, Average Result: 7.8873
Split #0, This Result: 5.6256, Average Result: 5.6256
Split #1, This Result: 6.2752, Average Result: 5.9504
Split #2, This Result: 4.4891, Average Result: 5.4633
Split #3, This Result: 4.6678, Average Result: 5.2644
Split #4, This Result: 16.5619, Average Result: 7.5239
Split #5, This Result: 11.7999, Average Result: 8.2366
Split #6, This Result: 3.3419, Average Result: 7.5373
Split #7, This Result: 6.1636, Average Result: 7.3656
Split #8, This Result: 4.7443, Average Result: 7.0744


[32m[I 2022-07-05 15:01:39,037][0m Trial 17 finished with value: 6.718711388498688 and parameters: {'n_estimators': 62, 'max_depth': 72, 'max_samples': 0.9246799094869378}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.5179, Average Result: 6.7187
Split #0, This Result: 5.6536, Average Result: 5.6536
Split #1, This Result: 6.3035, Average Result: 5.9785
Split #2, This Result: 4.4938, Average Result: 5.4836
Split #3, This Result: 4.3185, Average Result: 5.1923
Split #4, This Result: 16.5565, Average Result: 7.4652
Split #5, This Result: 11.7948, Average Result: 8.1868
Split #6, This Result: 2.9411, Average Result: 7.4374
Split #7, This Result: 6.1707, Average Result: 7.2791
Split #8, This Result: 5.3574, Average Result: 7.0655


[32m[I 2022-07-05 15:06:17,490][0m Trial 18 finished with value: 6.720214861549404 and parameters: {'n_estimators': 67, 'max_depth': 59, 'max_samples': 0.21664614009968064}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.6124, Average Result: 6.7202
Split #0, This Result: 5.6275, Average Result: 5.6275
Split #1, This Result: 6.2788, Average Result: 5.9532
Split #2, This Result: 4.4809, Average Result: 5.4624
Split #3, This Result: 4.1042, Average Result: 5.1229
Split #4, This Result: 16.5606, Average Result: 7.4104
Split #5, This Result: 11.7970, Average Result: 8.1415
Split #6, This Result: 3.6656, Average Result: 7.5021
Split #7, This Result: 6.1610, Average Result: 7.3344
Split #8, This Result: 5.0577, Average Result: 7.0815


[32m[I 2022-07-05 15:18:51,853][0m Trial 19 finished with value: 6.726417277079429 and parameters: {'n_estimators': 111, 'max_depth': 99, 'max_samples': 0.5397768851422293}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.5309, Average Result: 6.7264
Split #0, This Result: 5.8003, Average Result: 5.8003
Split #1, This Result: 6.3526, Average Result: 6.0765
Split #2, This Result: 4.5406, Average Result: 5.5645
Split #3, This Result: 4.6399, Average Result: 5.3333
Split #4, This Result: 16.5914, Average Result: 7.5850
Split #5, This Result: 11.8013, Average Result: 8.2877
Split #6, This Result: 3.8088, Average Result: 7.6478
Split #7, This Result: 6.1975, Average Result: 7.4665
Split #8, This Result: 5.4292, Average Result: 7.2402


[32m[I 2022-07-05 15:21:57,760][0m Trial 20 finished with value: 6.887346364531476 and parameters: {'n_estimators': 37, 'max_depth': 81, 'max_samples': 0.14573196529779872}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.7120, Average Result: 6.8873
Split #0, This Result: 6.9587, Average Result: 6.9587
Split #1, This Result: 6.8754, Average Result: 6.9170
Split #2, This Result: 5.4490, Average Result: 6.4277
Split #3, This Result: 5.9711, Average Result: 6.3135
Split #4, This Result: 16.7492, Average Result: 8.4007
Split #5, This Result: 12.5432, Average Result: 9.0911
Split #6, This Result: 4.8172, Average Result: 8.4805
Split #7, This Result: 6.7740, Average Result: 8.2672
Split #8, This Result: 6.0271, Average Result: 8.0183


[32m[I 2022-07-05 15:24:34,478][0m Trial 21 finished with value: 7.670921894205625 and parameters: {'n_estimators': 46, 'max_depth': 24, 'max_samples': 0.06854504587626338}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 4.5443, Average Result: 7.6709
Split #0, This Result: 5.6582, Average Result: 5.6582
Split #1, This Result: 6.2958, Average Result: 5.9770
Split #2, This Result: 4.4770, Average Result: 5.4770
Split #3, This Result: 4.9843, Average Result: 5.3539
Split #4, This Result: 16.5494, Average Result: 7.5930
Split #5, This Result: 11.8145, Average Result: 8.2966
Split #6, This Result: 3.5764, Average Result: 7.6222
Split #7, This Result: 6.1489, Average Result: 7.4381
Split #8, This Result: 5.1438, Average Result: 7.1832


[32m[I 2022-07-05 15:27:27,097][0m Trial 22 finished with value: 6.822844266179125 and parameters: {'n_estimators': 16, 'max_depth': 62, 'max_samples': 0.3276088147406737}. Best is trial 12 with value: 6.710196429609631.[0m


Split #9, This Result: 3.5800, Average Result: 6.8228


In [None]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621