In [10]:
import torch
import random
import numpy as np
from pathlib import Path
import os
import sys

# Add parent directory to path so we can import project files in notebook
current_dir = os.path.dirname(os.path.realpath("__file__"))
lib_path = os.path.join(current_dir, "..")
sys.path.append(lib_path)

from runs.validate_model import validate_single_model
from models.nn_models import ExampleLSTM
from util.data import generate_settings, update_settings

# Autoreload
%load_ext autoreload
%autoreload 2

# Set random seed for reproducibility for random, numpy, and torch
seed = 16
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)

model_name_base = "ExampleLSTM_test"
model_constructor = ExampleLSTM
base_args = {
    "init": {
        "n_ts_features": 9,
        "n_static_features": 1,
        "hidden_size": 32,
        "num_layers": 2,
    },

    "fit": {
        'batch_size': 32,
        'num_epochs': 50,
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'optim_fn': torch.optim.Adam,
        'optim_kwargs': {"lr": 0.0001, 'weight_decay': 0.00001},
        'scheduler_fn': torch.optim.lr_scheduler.StepLR,
        'scheduler_kwargs': {"step_size": 1, "gamma": 0.99},
        'val_fraction': 0.1,
        'val_split_by_year': True,
        'seed': 16,
        'do_early_stopping': True,
    }
}

run_name_base = "example_hparam_run_4"

# Define hyperparameters to search over and their values
param_space = {
    "init.hidden_size": [16, 32, 64],
    "init.num_layers": [1, 2, 3],
    "fit.optim_kwargs.lr": [0.0001, 0.001],
    "fit.optim_kwargs.weight_decay": [0.0001],
}


settings = generate_settings(param_space, base_args)
print(f"Generated {len(settings)} settings to run.")

# Create base folder for run_name
Path(f"results/{run_name_base}").mkdir(parents=True, exist_ok=True)

# Shuffle settings
random.shuffle(settings)

# Run the hyperparameter search   
for i, setting in enumerate(settings):

    run_kwargs = update_settings(setting, base_args)

    print(f"Running {run_name_base} {i} with settings:")
    print(run_kwargs)

    result_df = validate_single_model(
        run_name_base + '_' + str(i),
        model_name_base + '_' + str(i),
        model_constructor,
        run_kwargs['init'],
        run_kwargs['fit'],
    )

    # Save the results
    result_df.to_csv(f"../output/runs/{run_name_base}/{run_name_base}_{i}.csv", index=False)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Generated 18 settings to run.
Running example_hparam_run_4 0 with settings:
{'init': {'n_ts_features': 9, 'n_static_features': 1, 'hidden_size': 64, 'num_layers': 3}, 'fit': {'batch_size': 32, 'num_epochs': 50, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'optim_kwargs': {'lr': 0.0001, 'weight_decay': 0.0001}, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 1, 'gamma': 0.99}, 'val_fraction': 0.1, 'val_split_by_year': True, 'seed': 16, 'do_early_stopping': True}}


In [None]:
import pandas as pd
from runs.run_benchmark import run_benchmark


# Loop over results and print the best one
best_result = None
best_result_idx = None
for i, setting in enumerate(settings):
    # Check if the result file exists
    if Path(f"results/{run_name_base}/{run_name_base}_{i}.csv").exists():
        result_df = pd.read_csv(f"results/{run_name_base}/{run_name_base}_{i}.csv")

        if setting['fit']['do_early_stopping']:
            val_loss = result_df.iloc[6, 1]
        else:
            val_loss = result_df.iloc[3, 1]
        print(f"Setting {i}: {val_loss}")
        if best_result is None or val_loss < best_result:
            best_result = val_loss

            best_result_idx = i

print(f"Best result: {best_result} at index {best_result_idx}")
print(f"Best settings: {settings[best_result_idx]}")

# Save best results to file
with open(f"results/{run_name_base}/best_settings_result.txt", "w") as f:
    f.write(f"Best result: {best_result} at index {best_result_idx}\n")

# Save best settings dict to file with pickle
import pickle
settings_to_save = settings[best_result_idx]
with open(f"results/{run_name_base}/best_settings_dict.pkl", "wb") as f:
    pickle.dump(settings_to_save, f)


Setting 0: 2.597805378528742
Setting 1: 3.0681170397079907
Setting 2: 2.326060502574994
Setting 3: 7.712058404317269
Setting 4: 30.238999385100144
Setting 5: 5.7340222505422735
Setting 6: 2.3480412593254676
Setting 7: 64.13078469496507
Setting 8: 30.238911078526424
Setting 9: 2.418242845397729
Setting 10: 25.09572538962731
Setting 11: 2.2192195630990543
Setting 12: 5.734135274703686
Setting 13: 7.7116414950444145
Setting 14: 2.4967379237596807
Setting 15: 2.1950585532646913
Setting 16: 2.583382872434763
Setting 17: 2.2109741820738864
Setting 18: 2.465893642260478
Setting 19: 60.72057254497822
Setting 20: 30.238027425912712
Setting 21: 2.1241133407904553
Setting 22: 25.095947650762703
Setting 23: 7.712051939505797
Setting 24: 6.2202374407878285
Setting 25: 2.3356983283391366
Best result: 2.1241133407904553 at index 21
Best settings: {'init': {'n_ts_features': 9, 'n_static_features': 1, 'hidden_size': 16, 'num_layers': 3}, 'fit': {'batch_size': 32, 'num_epochs': 50, 'device': 'cuda', 'op

In [11]:
# Load best settings dict from file with pickle
import pickle
with open(f"results/{run_name_base}/best_settings_dict.pkl", "rb") as f:
    best_settings = pickle.load(f)


# Assert that the best settings are the same as the best settings found if settings exist
if best_result_idx is not None: 
    assert best_settings == settings[best_result_idx]


# Run the full benchmark for the best settings
run_kwargs = update_settings(best_settings, base_args)
result_df = run_benchmark(
    run_name_base + '_best',
    model_name_base + '_best',
    model_constructor,
    run_kwargs['init'],
    run_kwargs['fit'],
)

print(result_df)



Running setting 1/2


Epoch 1/10 | Loss: 13.7711: 100%|██████████| 420/420 [00:12<00:00, 33.69it/s]
Validation Epoch 1/10 | Loss: 5.2663: 100%|██████████| 53/53 [00:01<00:00, 38.23it/s]
Epoch 2/10 | Loss: 5.8160: 100%|██████████| 420/420 [00:11<00:00, 35.12it/s]
Validation Epoch 2/10 | Loss: 5.6659: 100%|██████████| 53/53 [00:01<00:00, 38.82it/s]
Epoch 3/10 | Loss: 4.5793: 100%|██████████| 420/420 [00:12<00:00, 34.81it/s]
Validation Epoch 3/10 | Loss: 3.9131: 100%|██████████| 53/53 [00:01<00:00, 39.70it/s]
Epoch 4/10 | Loss: 3.0510: 100%|██████████| 420/420 [00:12<00:00, 34.94it/s]
Validation Epoch 4/10 | Loss: 3.2511: 100%|██████████| 53/53 [00:01<00:00, 36.78it/s]
Epoch 5/10 | Loss: 2.5474: 100%|██████████| 420/420 [00:12<00:00, 33.56it/s]
Validation Epoch 5/10 | Loss: 3.3667: 100%|██████████| 53/53 [00:01<00:00, 38.82it/s]
Epoch 6/10 | Loss: 2.2705: 100%|██████████| 420/420 [00:12<00:00, 34.15it/s]
Validation Epoch 6/10 | Loss: 2.3436: 100%|██████████| 53/53 [00:01<00:00, 38.07it/s]
Epoch 7/10 | Loss: 2.

For setting 1/2, average final validation loss: 2.325794701306325
Settings: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.001, 'weight_decay': 0.0001}}
Running setting 2/2


Epoch 1/10 | Loss: 56.5167: 100%|██████████| 423/423 [00:12<00:00, 33.02it/s]
Validation Epoch 1/10 | Loss: 25.0974: 100%|██████████| 50/50 [00:01<00:00, 39.01it/s]
Epoch 2/10 | Loss: 11.9122: 100%|██████████| 423/423 [00:12<00:00, 34.68it/s]
Validation Epoch 2/10 | Loss: 9.6758: 100%|██████████| 50/50 [00:01<00:00, 38.86it/s] 
Epoch 3/10 | Loss: 6.9333: 100%|██████████| 423/423 [00:12<00:00, 34.27it/s]
Validation Epoch 3/10 | Loss: 6.3329: 100%|██████████| 50/50 [00:01<00:00, 38.04it/s]
Epoch 4/10 | Loss: 6.1320: 100%|██████████| 423/423 [00:12<00:00, 34.79it/s]
Validation Epoch 4/10 | Loss: 5.2698: 100%|██████████| 50/50 [00:01<00:00, 35.23it/s]
Epoch 5/10 | Loss: 6.0255: 100%|██████████| 423/423 [00:12<00:00, 33.72it/s]
Validation Epoch 5/10 | Loss: 5.0155: 100%|██████████| 50/50 [00:01<00:00, 40.60it/s]
Epoch 6/10 | Loss: 6.0127: 100%|██████████| 423/423 [00:12<00:00, 35.01it/s]
Validation Epoch 6/10 | Loss: 4.9434: 100%|██████████| 50/50 [00:01<00:00, 38.88it/s]
Epoch 7/10 | Loss:

For setting 2/2, average final validation loss: 4.787838568687439
Settings: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.0001, 'weight_decay': 0.0001}}


Epoch 1/10 | Loss: 13.6868: 100%|██████████| 417/417 [00:12<00:00, 33.11it/s]
Validation Epoch 1/10 | Loss: 4.1956: 100%|██████████| 56/56 [00:01<00:00, 38.55it/s]
Epoch 2/10 | Loss: 5.9236: 100%|██████████| 417/417 [00:12<00:00, 34.33it/s]
Validation Epoch 2/10 | Loss: 3.7410: 100%|██████████| 56/56 [00:01<00:00, 39.86it/s]
Epoch 3/10 | Loss: 5.4929: 100%|██████████| 417/417 [00:11<00:00, 35.10it/s]
Validation Epoch 3/10 | Loss: 3.5991: 100%|██████████| 56/56 [00:01<00:00, 38.49it/s]
Epoch 4/10 | Loss: 5.1651: 100%|██████████| 417/417 [00:12<00:00, 34.42it/s]
Validation Epoch 4/10 | Loss: 3.4802: 100%|██████████| 56/56 [00:01<00:00, 37.52it/s]
Epoch 5/10 | Loss: 4.4667: 100%|██████████| 417/417 [00:12<00:00, 34.32it/s]
Validation Epoch 5/10 | Loss: 2.8402: 100%|██████████| 56/56 [00:01<00:00, 36.54it/s]
Epoch 6/10 | Loss: 2.7687: 100%|██████████| 417/417 [00:12<00:00, 33.87it/s]
Validation Epoch 6/10 | Loss: 2.6725: 100%|██████████| 56/56 [00:01<00:00, 38.12it/s]
Epoch 7/10 | Loss: 2.

Final validation loss of outer fold: 2.370673019438982
Final best setting of outer fold: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.001, 'weight_decay': 0.0001}}


Epoch 1/50 | Loss: 37.9866: 100%|██████████| 421/421 [00:12<00:00, 32.88it/s]
Validation Epoch 1/50 | Loss: 10.8131: 100%|██████████| 52/52 [00:01<00:00, 40.99it/s]
Epoch 2/50 | Loss: 7.3026: 100%|██████████| 421/421 [00:12<00:00, 34.02it/s]
Validation Epoch 2/50 | Loss: 5.1150: 100%|██████████| 52/52 [00:01<00:00, 38.69it/s]
Epoch 3/50 | Loss: 6.0177: 100%|██████████| 421/421 [00:12<00:00, 34.34it/s]
Validation Epoch 3/50 | Loss: 4.8725: 100%|██████████| 52/52 [00:01<00:00, 39.41it/s]
Epoch 4/50 | Loss: 6.0037: 100%|██████████| 421/421 [00:12<00:00, 34.28it/s]
Validation Epoch 4/50 | Loss: 4.8704: 100%|██████████| 52/52 [00:01<00:00, 39.48it/s]
Epoch 5/50 | Loss: 5.9919: 100%|██████████| 421/421 [00:12<00:00, 33.87it/s]
Validation Epoch 5/50 | Loss: 4.8879: 100%|██████████| 52/52 [00:01<00:00, 40.45it/s]
Epoch 6/50 | Loss: 5.9244: 100%|██████████| 421/421 [00:12<00:00, 34.47it/s]
Validation Epoch 6/50 | Loss: 4.7264: 100%|██████████| 52/52 [00:01<00:00, 37.99it/s]
Epoch 7/50 | Loss: 4

Running setting 1/2


Epoch 1/10 | Loss: 14.0061: 100%|██████████| 417/417 [00:12<00:00, 34.08it/s]
Validation Epoch 1/10 | Loss: 5.3538: 100%|██████████| 57/57 [00:01<00:00, 40.17it/s]
Epoch 2/10 | Loss: 5.8437: 100%|██████████| 417/417 [00:11<00:00, 35.64it/s]
Validation Epoch 2/10 | Loss: 4.2348: 100%|██████████| 57/57 [00:01<00:00, 39.31it/s]
Epoch 3/10 | Loss: 5.2373: 100%|██████████| 417/417 [00:11<00:00, 35.93it/s]
Validation Epoch 3/10 | Loss: 3.3433: 100%|██████████| 57/57 [00:01<00:00, 39.16it/s]
Epoch 4/10 | Loss: 3.0949: 100%|██████████| 417/417 [00:11<00:00, 35.41it/s]
Validation Epoch 4/10 | Loss: 2.4103: 100%|██████████| 57/57 [00:01<00:00, 40.80it/s]
Epoch 5/10 | Loss: 2.3643: 100%|██████████| 417/417 [00:11<00:00, 35.56it/s]
Validation Epoch 5/10 | Loss: 2.6295: 100%|██████████| 57/57 [00:01<00:00, 38.94it/s]
Epoch 6/10 | Loss: 2.1288: 100%|██████████| 417/417 [00:11<00:00, 35.50it/s]
Validation Epoch 6/10 | Loss: 2.3986: 100%|██████████| 57/57 [00:01<00:00, 39.75it/s]
Epoch 7/10 | Loss: 1.

For setting 1/2, average final validation loss: 2.010524509245889
Settings: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.001, 'weight_decay': 0.0001}}
Running setting 2/2


Epoch 1/10 | Loss: 55.7569: 100%|██████████| 420/420 [00:12<00:00, 33.90it/s]
Validation Epoch 1/10 | Loss: 20.7522: 100%|██████████| 53/53 [00:01<00:00, 37.19it/s]
Epoch 2/10 | Loss: 11.5191: 100%|██████████| 420/420 [00:11<00:00, 35.89it/s]
Validation Epoch 2/10 | Loss: 7.9371: 100%|██████████| 53/53 [00:01<00:00, 38.72it/s] 
Epoch 3/10 | Loss: 6.6626: 100%|██████████| 420/420 [00:11<00:00, 35.75it/s]
Validation Epoch 3/10 | Loss: 6.0538: 100%|██████████| 53/53 [00:01<00:00, 40.12it/s]
Epoch 4/10 | Loss: 6.0488: 100%|██████████| 420/420 [00:11<00:00, 35.03it/s]
Validation Epoch 4/10 | Loss: 5.6427: 100%|██████████| 53/53 [00:01<00:00, 38.11it/s]
Epoch 5/10 | Loss: 5.9734: 100%|██████████| 420/420 [00:12<00:00, 34.46it/s]
Validation Epoch 5/10 | Loss: 5.5625: 100%|██████████| 53/53 [00:01<00:00, 38.56it/s]
Epoch 6/10 | Loss: 5.9563: 100%|██████████| 420/420 [00:11<00:00, 35.03it/s]
Validation Epoch 6/10 | Loss: 5.5391: 100%|██████████| 53/53 [00:01<00:00, 38.27it/s]
Epoch 7/10 | Loss:

For setting 2/2, average final validation loss: 5.400241127553976
Settings: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.0001, 'weight_decay': 0.0001}}


Epoch 1/10 | Loss: 13.9753: 100%|██████████| 423/423 [00:12<00:00, 34.11it/s]
Validation Epoch 1/10 | Loss: 4.9519: 100%|██████████| 50/50 [00:01<00:00, 39.84it/s]
Epoch 2/10 | Loss: 5.8976: 100%|██████████| 423/423 [00:11<00:00, 35.83it/s]
Validation Epoch 2/10 | Loss: 4.4041: 100%|██████████| 50/50 [00:01<00:00, 39.88it/s]
Epoch 3/10 | Loss: 5.2698: 100%|██████████| 423/423 [00:11<00:00, 35.56it/s]
Validation Epoch 3/10 | Loss: 3.5727: 100%|██████████| 50/50 [00:01<00:00, 39.20it/s]
Epoch 4/10 | Loss: 4.5163: 100%|██████████| 423/423 [00:12<00:00, 35.00it/s]
Validation Epoch 4/10 | Loss: 3.4700: 100%|██████████| 50/50 [00:01<00:00, 38.78it/s]
Epoch 5/10 | Loss: 3.2917: 100%|██████████| 423/423 [00:12<00:00, 35.12it/s]
Validation Epoch 5/10 | Loss: 3.9926: 100%|██████████| 50/50 [00:01<00:00, 37.57it/s]
Epoch 6/10 | Loss: 2.7162: 100%|██████████| 423/423 [00:12<00:00, 34.68it/s]
Validation Epoch 6/10 | Loss: 2.9337: 100%|██████████| 50/50 [00:01<00:00, 37.50it/s]
Epoch 7/10 | Loss: 2.

Final validation loss of outer fold: 2.5075659185647963
Final best setting of outer fold: {'batch_size': 32, 'num_epochs': 10, 'device': 'cuda', 'optim_fn': <class 'torch.optim.adam.Adam'>, 'scheduler_fn': <class 'torch.optim.lr_scheduler.StepLR'>, 'scheduler_kwargs': {'step_size': 2, 'gamma': 0.8}, 'val_fraction': 0.1, 'val_split_by_year': True, 'optim_kwargs': {'lr': 0.001, 'weight_decay': 0.0001}}


Epoch 1/50 | Loss: 32.7833: 100%|██████████| 422/422 [00:12<00:00, 32.88it/s]
Validation Epoch 1/50 | Loss: 8.2974: 100%|██████████| 52/52 [00:01<00:00, 37.86it/s]
Epoch 2/50 | Loss: 6.6138: 100%|██████████| 422/422 [00:12<00:00, 34.53it/s]
Validation Epoch 2/50 | Loss: 4.9733: 100%|██████████| 52/52 [00:01<00:00, 38.92it/s]
Epoch 3/50 | Loss: 6.0384: 100%|██████████| 422/422 [00:12<00:00, 34.59it/s]
Validation Epoch 3/50 | Loss: 4.9098: 100%|██████████| 52/52 [00:01<00:00, 38.85it/s]
Epoch 4/50 | Loss: 6.0352: 100%|██████████| 422/422 [00:12<00:00, 33.16it/s]
Validation Epoch 4/50 | Loss: 4.8700: 100%|██████████| 52/52 [00:01<00:00, 40.27it/s]
Epoch 5/50 | Loss: 6.0361: 100%|██████████| 422/422 [00:12<00:00, 33.78it/s]
Validation Epoch 5/50 | Loss: 4.8904: 100%|██████████| 52/52 [00:01<00:00, 36.28it/s]
Epoch 6/50 | Loss: 6.0347: 100%|██████████| 422/422 [00:12<00:00, 32.87it/s]
Validation Epoch 6/50 | Loss: 4.8608: 100%|██████████| 52/52 [00:01<00:00, 38.55it/s]
Epoch 7/50 | Loss: 6.

KeyboardInterrupt: 

In [None]:
# Get the evaluation results
from runs.run_benchmark import _compute_evaluation_results
_compute_evaluation_results(run_name_base + '_best')