# Parameter search with optuna - BASELINE

## Importing section

In [1]:
import optuna

import numpy as np

import torch
from sklearn.metrics import r2_score
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.routines.run_config import choice_dataset, choice_trainloader, choice_loss, choice_optim
from etnn.tools.training import train_epoch, eval_epoch
from etnn.tools.training_tools import ConfigStore, seeding_all

## Definition of objective function for ETNN

In [2]:
def objective(trial):
    # init default config
    config = ConfigStore(
        in_dim=15,
        hidden_dim=0, #trial.suggest_int("hidden_dim", 16, 512, step=16),
        out_dim=1,
        k=0, #trial.suggest_int("k", 1, 5),
        dataset=0,
        ds_size=10_000,
        num_gondolas=10,
        num_part_pg=5,
        loss_name='mse',
        optimizer_name='adam',
        num_max_epochs=30, # real: 100
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        batch_size=1024,
        early_stop_tol=5,
        use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
        seed=420,
        label_type=label,
        final_label_factor=1/1000
    )

    # loading dataset
    dataset, df_index = choice_dataset(config, dataset_path)
    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # build tree
    tree_structure = TreeNode(
        node_type="C",
        children=[
            TreeNode("P", [TreeNode("E", config.num_part_pg)])
            for _ in range(config.num_gondolas)
        ]
    )

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    layer_list = [torch.nn.Flatten()]
    features = config.in_dim * config.num_gondolas * config.num_part_pg

    # for each layer create a linear layer and relu (except last one)
    for i in range(trial.suggest_int("n_layers", 1, 5)-1):
        # determine new feature dimension
        new_features = trial.suggest_int(f"n_dim_{i}", 1, 512)

        # add layer and relu to list
        layer_list += [torch.nn.Linear(features, new_features), torch.nn.ReLU()]

        # set the new feature to be the current feature
        features = new_features

    # set the last layer - this one must map to the out dimension
    layer_list += [torch.nn.Linear(features, config.out_dim)]
    model = torch.nn.Sequential(*layer_list).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Tree advanced label

In [3]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree_advanced" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 70

In [4]:
study_tree_advanced = optuna.create_study(study_name="Best tree advanced label config", directions=['maximize'])
study_tree_advanced.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-07 19:03:23,890] A new study created in memory with name: Best tree advanced label config


  0%|          | 0/70 [00:00<?, ?it/s]

[I 2023-12-07 19:04:38,585] Trial 0 finished with value: -24750.47571961474 and parameters: {'learning_rate': 0.004047065387150369, 'batcher': False, 'n_layers': 1}. Best is trial 0 with value: -24750.47571961474.
[I 2023-12-07 19:06:12,288] Trial 1 finished with value: -145200.10870485418 and parameters: {'learning_rate': 0.0002813629489521779, 'batcher': True, 'n_layers': 1}. Best is trial 0 with value: -24750.47571961474.
[I 2023-12-07 19:08:03,845] Trial 2 finished with value: -3.5193973016892506 and parameters: {'learning_rate': 0.0009736335534809103, 'batcher': True, 'n_layers': 4, 'n_dim_0': 210, 'n_dim_1': 404, 'n_dim_2': 435}. Best is trial 2 with value: -3.5193973016892506.
[I 2023-12-07 19:09:47,026] Trial 3 finished with value: -860.5048661204946 and parameters: {'learning_rate': 1.538543338413527e-05, 'batcher': True, 'n_layers': 3, 'n_dim_0': 38, 'n_dim_1': 473}. Best is trial 2 with value: -3.5193973016892506.
[I 2023-12-07 19:11:31,903] Trial 4 finished with value: -53.

[I 2023-12-07 20:01:02,787] Trial 32 finished with value: -1.2392169459826756 and parameters: {'learning_rate': 0.0020097283731976, 'batcher': True, 'n_layers': 5, 'n_dim_0': 456, 'n_dim_1': 512, 'n_dim_2': 67, 'n_dim_3': 167}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:02:52,105] Trial 33 finished with value: -1031.2252599716708 and parameters: {'learning_rate': 0.005975091704208469, 'batcher': True, 'n_layers': 5, 'n_dim_0': 508, 'n_dim_1': 343, 'n_dim_2': 173, 'n_dim_3': 63}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:04:32,014] Trial 34 finished with value: -1097.0363806006017 and parameters: {'learning_rate': 0.0031959451616620353, 'batcher': True, 'n_layers': 5, 'n_dim_0': 398, 'n_dim_1': 229, 'n_dim_2': 252, 'n_dim_3': 11}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:06:00,389] Trial 35 finished with value: -1157.7958230462268 and parameters: {'learning_rate': 0.006121679524104839, 'batcher': True, 'n_layers': 4

[I 2023-12-07 20:47:09,121] Trial 63 finished with value: 0.5931296250009913 and parameters: {'learning_rate': 0.0027394509516266816, 'batcher': True, 'n_layers': 5, 'n_dim_0': 246, 'n_dim_1': 105, 'n_dim_2': 305, 'n_dim_3': 142}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:48:20,856] Trial 64 finished with value: -1169.8736436286435 and parameters: {'learning_rate': 0.004562416715346014, 'batcher': True, 'n_layers': 2, 'n_dim_0': 5}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:49:35,609] Trial 65 finished with value: 0.7496335321717161 and parameters: {'learning_rate': 0.0037634356730918404, 'batcher': True, 'n_layers': 5, 'n_dim_0': 352, 'n_dim_1': 424, 'n_dim_2': 265, 'n_dim_3': 186}. Best is trial 26 with value: 0.9355811634135069.
[I 2023-12-07 20:50:50,234] Trial 66 finished with value: 0.5662733510172068 and parameters: {'learning_rate': 0.004082910174176839, 'batcher': False, 'n_layers': 5, 'n_dim_0': 314, 'n_dim_1': 423, 'n_dim_2': 2

In [5]:
best_par_tree_advanced = study_tree_advanced.best_params
print(best_par_tree_advanced)

{'learning_rate': 0.005620447373414403, 'batcher': True, 'n_layers': 5, 'n_dim_0': 497, 'n_dim_1': 510, 'n_dim_2': 47, 'n_dim_3': 159}


In [6]:
study_tree_advanced.sampler.__class__.__name__

'TPESampler'

In [7]:
df_tree_advanced = study_tree_advanced.trials_dataframe()
df_tree_advanced.to_csv("study_label-tree-advanced_baseline.csv")
display(df_tree_advanced)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_learning_rate,params_n_dim_0,params_n_dim_1,params_n_dim_2,params_n_dim_3,params_n_layers,state
0,0,-24750.475720,2023-12-07 19:03:23.898191,2023-12-07 19:04:38.585621,0 days 00:01:14.687430,False,0.004047,,,,,1,COMPLETE
1,1,-145200.108705,2023-12-07 19:04:38.586621,2023-12-07 19:06:12.288189,0 days 00:01:33.701568,True,0.000281,,,,,1,COMPLETE
2,2,-3.519397,2023-12-07 19:06:12.290324,2023-12-07 19:08:03.844086,0 days 00:01:51.553762,True,0.000974,210.0,404.0,435.0,,4,COMPLETE
3,3,-860.504866,2023-12-07 19:08:03.845907,2023-12-07 19:09:47.025777,0 days 00:01:43.179870,True,0.000015,38.0,473.0,,,3,COMPLETE
4,4,-53.302765,2023-12-07 19:09:47.028782,2023-12-07 19:11:31.903042,0 days 00:01:44.874260,False,0.000017,473.0,390.0,511.0,,4,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,65,0.749634,2023-12-07 20:48:20.858305,2023-12-07 20:49:35.609636,0 days 00:01:14.751331,True,0.003763,352.0,424.0,265.0,186.0,5,COMPLETE
66,66,0.566273,2023-12-07 20:49:35.610635,2023-12-07 20:50:50.234949,0 days 00:01:14.624314,False,0.004083,314.0,423.0,249.0,91.0,5,COMPLETE
67,67,-0.245748,2023-12-07 20:50:50.235946,2023-12-07 20:52:04.888581,0 days 00:01:14.652635,True,0.008307,372.0,49.0,268.0,286.0,5,COMPLETE
68,68,-0.672174,2023-12-07 20:52:04.890580,2023-12-07 20:53:18.439272,0 days 00:01:13.548692,False,0.006775,352.0,274.0,195.0,116.0,5,COMPLETE


## Tree label

In [18]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [19]:
study_tree = optuna.create_study(study_name="Best tree label config", directions=['maximize'])
study_tree.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-07 20:55:06,191] A new study created in memory with name: Best tree label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-07 20:56:21,472] Trial 0 finished with value: -210601.94645228545 and parameters: {'learning_rate': 3.5762164835316424e-05, 'batcher': False, 'n_layers': 1}. Best is trial 0 with value: -210601.94645228545.
[I 2023-12-07 20:57:36,740] Trial 1 finished with value: -3.337819704364103 and parameters: {'learning_rate': 0.0009340234771113171, 'batcher': True, 'n_layers': 4, 'n_dim_0': 31, 'n_dim_1': 144, 'n_dim_2': 116}. Best is trial 1 with value: -3.337819704364103.
[I 2023-12-07 20:58:50,586] Trial 2 finished with value: -14.395422711268527 and parameters: {'learning_rate': 4.4402445245951924e-05, 'batcher': True, 'n_layers': 4, 'n_dim_0': 25, 'n_dim_1': 50, 'n_dim_2': 453}. Best is trial 1 with value: -3.337819704364103.
[I 2023-12-07 21:00:03,806] Trial 3 finished with value: -25.24174673544831 and parameters: {'learning_rate': 0.0007706758420370273, 'batcher': True, 'n_layers': 4, 'n_dim_0': 42, 'n_dim_1': 332, 'n_dim_2': 405}. Best is trial 1 with value: -3.337819704364103

[I 2023-12-07 21:41:17,875] Trial 32 finished with value: -5.30462039756285 and parameters: {'learning_rate': 0.006566163561283359, 'batcher': False, 'n_layers': 5, 'n_dim_0': 433, 'n_dim_1': 472, 'n_dim_2': 148, 'n_dim_3': 408}. Best is trial 30 with value: 0.7672066914453695.
[I 2023-12-07 21:43:09,859] Trial 33 finished with value: -989.1997262990868 and parameters: {'learning_rate': 0.009982275827264298, 'batcher': False, 'n_layers': 4, 'n_dim_0': 421, 'n_dim_1': 503, 'n_dim_2': 62}. Best is trial 30 with value: 0.7672066914453695.
[I 2023-12-07 21:45:03,393] Trial 34 finished with value: -0.8994558132579256 and parameters: {'learning_rate': 0.006425253946387897, 'batcher': False, 'n_layers': 5, 'n_dim_0': 481, 'n_dim_1': 456, 'n_dim_2': 201, 'n_dim_3': 452}. Best is trial 30 with value: 0.7672066914453695.
[I 2023-12-07 21:46:49,456] Trial 35 finished with value: -2.2579536308298502 and parameters: {'learning_rate': 0.004202639229411177, 'batcher': False, 'n_layers': 4, 'n_dim_0':

In [21]:
best_par_tree = study_tree.best_params
print(best_par_tree)

{'learning_rate': 0.007107275844311816, 'batcher': False, 'n_layers': 5, 'n_dim_0': 453, 'n_dim_1': 222, 'n_dim_2': 41, 'n_dim_3': 506}


In [22]:
study_tree.sampler.__class__.__name__

'TPESampler'

In [23]:
df_tree = study_tree.trials_dataframe()
df_tree.to_csv("study_label-tree_baseline.csv")
display(df_tree)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_learning_rate,params_n_dim_0,params_n_dim_1,params_n_dim_2,params_n_dim_3,params_n_layers,state
0,0,-210601.946452,2023-12-07 20:55:06.198745,2023-12-07 20:56:21.472785,0 days 00:01:15.274040,False,0.000036,,,,,1,COMPLETE
1,1,-3.337820,2023-12-07 20:56:21.474785,2023-12-07 20:57:36.740247,0 days 00:01:15.265462,True,0.000934,31.0,144.0,116.0,,4,COMPLETE
2,2,-14.395423,2023-12-07 20:57:36.742584,2023-12-07 20:58:50.586277,0 days 00:01:13.843693,True,0.000044,25.0,50.0,453.0,,4,COMPLETE
3,3,-25.241747,2023-12-07 20:58:50.588276,2023-12-07 21:00:03.805778,0 days 00:01:13.217502,True,0.000771,42.0,332.0,405.0,,4,COMPLETE
4,4,-338.021402,2023-12-07 21:00:03.807736,2023-12-07 21:01:17.091042,0 days 00:01:13.283306,True,0.000094,66.0,437.0,,,3,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,65,-0.008392,2023-12-07 22:31:15.633734,2023-12-07 22:32:30.223154,0 days 00:01:14.589420,True,0.007477,5.0,60.0,361.0,91.0,5,COMPLETE
66,66,-1.638066,2023-12-07 22:32:30.224154,2023-12-07 22:33:45.061230,0 days 00:01:14.837076,True,0.004651,389.0,121.0,262.0,166.0,5,COMPLETE
67,67,-608.274329,2023-12-07 22:33:45.062229,2023-12-07 22:35:01.462536,0 days 00:01:16.400307,True,0.007462,458.0,163.0,122.0,,4,COMPLETE
68,68,-0.017936,2023-12-07 22:35:01.464535,2023-12-07 22:36:16.353428,0 days 00:01:14.888893,True,0.005755,430.0,103.0,3.0,463.0,5,COMPLETE


## Default

In [3]:
# setting global parameters
dataset_path = "../../datasets/"
label = "default" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [4]:
study_default = optuna.create_study(study_name="Best default label config", directions=['maximize'])
study_default.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 08:17:34,919] A new study created in memory with name: Best default label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 08:18:53,808] Trial 0 finished with value: -1699.6420836855243 and parameters: {'learning_rate': 0.0004316954260403846, 'batcher': False, 'n_layers': 1}. Best is trial 0 with value: -1699.6420836855243.
[I 2023-12-08 08:20:08,280] Trial 1 finished with value: -11.830721453734968 and parameters: {'learning_rate': 1.0187984319561083e-05, 'batcher': True, 'n_layers': 3, 'n_dim_0': 130, 'n_dim_1': 18}. Best is trial 1 with value: -11.830721453734968.
[I 2023-12-08 08:21:24,900] Trial 2 finished with value: -48.45359902615071 and parameters: {'learning_rate': 0.0006804323877763095, 'batcher': True, 'n_layers': 2, 'n_dim_0': 118}. Best is trial 1 with value: -11.830721453734968.
[I 2023-12-08 08:22:40,923] Trial 3 finished with value: -0.8399402090661099 and parameters: {'learning_rate': 1.5593036116580817e-05, 'batcher': False, 'n_layers': 5, 'n_dim_0': 126, 'n_dim_1': 28, 'n_dim_2': 407, 'n_dim_3': 357}. Best is trial 3 with value: -0.8399402090661099.
[I 2023-12-08 08:23:57,

In [5]:
best_par_default = study_default.best_params
print(best_par_default)

{'learning_rate': 0.003675299618197335, 'batcher': False, 'n_layers': 5, 'n_dim_0': 481, 'n_dim_1': 287, 'n_dim_2': 336, 'n_dim_3': 192}


In [6]:
study_default.sampler.__class__.__name__

'TPESampler'

In [7]:
df_default = study_default.trials_dataframe()
df_default.to_csv("study_label-default_baseline.csv")
display(df_default)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_learning_rate,params_n_dim_0,params_n_dim_1,params_n_dim_2,params_n_dim_3,params_n_layers,state
0,0,-1699.642084,2023-12-08 08:17:34.929904,2023-12-08 08:18:53.808722,0 days 00:01:18.878818,False,0.000432,,,,,1,COMPLETE
1,1,-11.830721,2023-12-08 08:18:53.810684,2023-12-08 08:20:08.280544,0 days 00:01:14.469860,True,1e-05,130.0,18.0,,,3,COMPLETE
2,2,-48.453599,2023-12-08 08:20:08.282504,2023-12-08 08:21:24.900261,0 days 00:01:16.617757,True,0.00068,118.0,,,,2,COMPLETE
3,3,-0.83994,2023-12-08 08:21:24.901261,2023-12-08 08:22:40.922861,0 days 00:01:16.021600,False,1.6e-05,126.0,28.0,407.0,357.0,5,COMPLETE
4,4,-0.869214,2023-12-08 08:22:40.924860,2023-12-08 08:23:57.188800,0 days 00:01:16.263940,True,8e-05,270.0,487.0,122.0,134.0,5,COMPLETE
5,5,-33.513969,2023-12-08 08:23:57.189800,2023-12-08 08:25:12.836273,0 days 00:01:15.646473,True,0.00037,309.0,,,,2,COMPLETE
6,6,-0.464049,2023-12-08 08:25:12.837272,2023-12-08 08:26:29.549542,0 days 00:01:16.712270,False,0.001363,161.0,446.0,,,3,COMPLETE
7,7,-1917.838388,2023-12-08 08:26:29.550542,2023-12-08 08:27:45.259898,0 days 00:01:15.709356,False,0.000196,,,,,1,COMPLETE
8,8,-258.497881,2023-12-08 08:27:45.260892,2023-12-08 08:28:58.552896,0 days 00:01:13.292004,True,2e-05,58.0,,,,2,COMPLETE
9,9,-3.872674,2023-12-08 08:28:58.553477,2023-12-08 08:30:13.924757,0 days 00:01:15.371280,False,3.4e-05,391.0,207.0,,,3,COMPLETE
