In [42]:
import optuna
import joblib
import datetime

In [43]:
from stesml.model_tools import build_train_test_model

In [44]:
def objective(trial):   
    # Save the study before running the next trial
    joblib.dump(study, "../studies/study_" + datetime.datetime.now().strftime("%Y%m%d-%H") + ".pkl")
    
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 10, 10000, log=True)
        epochs = 20 # Neglect optmizing # of epochs, allow early stopping to determine # of epochs
        parameters = {'n_layers': n_layers, 'n_hidden_units': n_hidden_units, 'batch_size': batch_size, 'epochs': epochs}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters)
    elif model_type == 'XGBoost':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 50, 10000, log=True)
        parameters = {'n_estimators': n_estimators}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 100, log=True)
        parameters = {'n_estimators': n_estimators}
        result, addendum = build_train_test_model(data_dir, model_type, target, metric, scale, parameters)
    
    return result

In [39]:
data_dir = "../data/Sulfur_Models/"
model_type = 'NN' # Options: NN, XGBoost, RandomForest
target = 'Tavg' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'
    
load_study = True
study_name = "study_20220630-17.pkl"

In [40]:
if load_study:
    study = joblib.load("../studies/" + study_name)
    print("Best trial until now:")
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
else:
    study = optuna.create_study(direction=direction)

[32m[I 2022-06-30 17:35:51,652][0m A new study created in memory with name: no-name-2e75a2b5-8854-4849-8897-6025dfbe840c[0m


In [41]:
study.optimize(objective, n_trials=50)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Split #0, This Result: 2.4031, Average Result: 2.4031
Epoch 1/20
Epoch 2/20
Epoch 3/20
Split #1, This Result: 1.7177, Average Result: 2.0604
Epoch 1/20
Epoch 2/20
Epoch 3/20
Split #2, This Result: 4.2977, Average Result: 2.8062
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Split #3, This Result: 1.8929, Average Result: 2.5779
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


[32m[I 2022-06-30 17:57:50,582][0m Trial 0 finished with value: 2.5226111961553745 and parameters: {'n_layers': 4, 'n_hidden_units': 38, 'batch_size': 25}. Best is trial 0 with value: 2.5226111961553745.[0m


Split #4, This Result: 2.3016, Average Result: 2.5226
Epoch 1/20

KeyboardInterrupt: 

In [17]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621