In [3]:
import optuna

In [4]:
from stesml.model_tools import build_train_test_model

In [5]:
data_dir = "../data/Sulfur_Models/"
model_type = 'NN' # Options: NN, XGBoost, RandomForest
target = 'Tavg' # Options: Tavg, h
metric = 'rmse' # Options: rmse, r2
n_shuffles = 3

if metric == 'rmse':
    direction = 'minimize'
elif metric == 'r2':
    directon = 'maximize'

In [6]:
def objective(trial):   
    if model_type == 'NN':
        scale = True
        n_layers = trial.suggest_int("n_layers", 1, 5)
        n_hidden_units = trial.suggest_int("n_hidden_units", 10, 100)
        batch_size = trial.suggest_int("batch_size", 10, 10000, log=True)
        epochs = trial.suggest_int("epochs", 1, 20)
        result = build_train_test_model(data_dir, model_type, target, metric, scale, n_layers, n_hidden_units, batch_size, epochs, trial=trial, n_shuffles=n_shuffles)
    elif model_type == 'XGBoost':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 50, 10000, log=True)
        result = build_train_test_model(data_dir, model_type, target, metric, scale, n_estimators=n_estimators, trial=trial, n_shuffles=n_shuffles)
    elif model_type == 'RandomForest':
        scale = False
        n_estimators = trial.suggest_int("n_estimators", 1, 100, log=True)
        result = build_train_test_model(data_dir, model_type, target, metric, scale, n_estimators=n_estimators, trial=trial, n_shuffles=n_shuffles)
    
    return result

In [7]:
study = optuna.create_study(direction=direction)

[32m[I 2022-06-29 15:49:30,324][0m A new study created in memory with name: no-name-8ea2fa55-aa2a-4870-8bee-80fde6df3720[0m


In [None]:
study.optimize(objective, n_trials=45)

2022-06-29 15:49:46.134167: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-29 15:49:46.236457: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)


Epoch 1/7
Epoch 2/7
Stopped 5 epochs early. Applying penalty of 1.05. Old result = 1.9922087427003063
New result = 2.091819179835322
Result: 2.0918
RMSE AVG: 2.0918
Epoch 1/7
Epoch 2/7
Stopped 5 epochs early. Applying penalty of 1.05. Old result = 3.102303849699908
New result = 3.257419042184903
Result: 3.2574
RMSE AVG: 2.6746
Epoch 1/7
Epoch 2/7


[32m[I 2022-06-29 15:51:04,572][0m Trial 0 finished with value: 2.748779291731692 and parameters: {'n_layers': 5, 'n_hidden_units': 33, 'batch_size': 1134, 'epochs': 7}. Best is trial 0 with value: 2.748779291731692.[0m


Stopped 5 epochs early. Applying penalty of 1.05. Old result = 2.7591425268331915
New result = 2.8970996531748514
Result: 2.8971
RMSE AVG: 2.7488
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Stopped 15 epochs early. Applying penalty of 1.15. Old result = 1.8911354143158032
New result = 2.1748057264631737
Result: 2.1748
RMSE AVG: 2.1748
Epoch 1/19
Epoch 2/19
Epoch 3/19
Epoch 4/19
Epoch 5/19
Stopped 14 epochs early. Applying penalty of 1.1400000000000001. Old result = 1.778112134347848
New result = 2.027047833156547
Result: 2.0270
RMSE AVG: 2.1009
Epoch 1/19
Epoch 2/19


[32m[I 2022-06-29 15:52:41,685][0m Trial 1 finished with value: 2.1406073803899517 and parameters: {'n_layers': 3, 'n_hidden_units': 87, 'batch_size': 2496, 'epochs': 19}. Best is trial 1 with value: 2.1406073803899517.[0m


Stopped 17 epochs early. Applying penalty of 1.17. Old result = 1.897409044059944
New result = 2.2199685815501344
Result: 2.2200
RMSE AVG: 2.1406
Epoch 1/19
Epoch 2/19
Stopped 17 epochs early. Applying penalty of 1.17. Old result = 1.903951836301064
New result = 2.2276236484722447
Result: 2.2276
RMSE AVG: 2.2276
Epoch 1/19

In [17]:
best_params = study.best_params

In [18]:
best_params

{'n_layers': 2, 'n_hidden_units': 79, 'batch_size': 339, 'epochs': 7}

In [21]:
best_value = study.best_value

In [22]:
best_value

1.0137617092656621