# Parameter search with optuna - BASELINE

## Importing section

In [1]:
import optuna

import numpy as np

import torch
from sklearn.metrics import r2_score
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.routines.run_config import choice_dataset, choice_trainloader, choice_loss, choice_optim
from etnn.tools.training import train_epoch, eval_epoch
from etnn.tools.training_tools import ConfigStore, seeding_all

## Definition of objective function for ETNN

In [2]:
def objective(trial):
    # init default config
    config = ConfigStore(
        in_dim=15,
        hidden_dim=0, #trial.suggest_int("hidden_dim", 16, 512, step=16),
        out_dim=1,
        k=0, #trial.suggest_int("k", 1, 5),
        dataset=-1,
        ds_size=10_000,
        num_gondolas=10,
        num_part_pg=5,
        loss_name='mse',
        optimizer_name='adam',
        num_max_epochs=30, # real: 100
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        batch_size=1024,
        early_stop_tol=5,
        use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
        seed=420,
        label_type=label,
        final_label_factor=1/1000
    )

    # loading dataset
    dataset, df_index = choice_dataset(config, dataset_path)
    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # build tree
    tree_structure = TreeNode(
        node_type="C",
        children=[
            TreeNode("P", [TreeNode("E", config.num_part_pg)])
            for _ in range(config.num_gondolas)
        ]
    )

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    layer_list = [torch.nn.Flatten()]
    features = config.in_dim * config.num_gondolas * config.num_part_pg

    # for each layer create a linear layer and relu (except last one)
    for i in range(trial.suggest_int("n_layers", 1, 5)-1):
        # determine new feature dimension
        new_features = trial.suggest_int(f"n_dim_{i}", 1, 512)

        # add layer and relu to list
        layer_list += [torch.nn.Linear(features, new_features), torch.nn.ReLU()]

        # set the new feature to be the current feature
        features = new_features

    # set the last layer - this one must map to the out dimension
    layer_list += [torch.nn.Linear(features, config.out_dim)]
    model = torch.nn.Sequential(*layer_list).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Tree advanced label

In [3]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree_advanced" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 70

In [4]:
study_tree_advanced = optuna.create_study(study_name="Best tree advanced label config", directions=['maximize'])
study_tree_advanced.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 19:56:49,734] A new study created in memory with name: Best tree advanced label config


  0%|          | 0/70 [00:00<?, ?it/s]

[I 2023-12-08 19:57:58,943] Trial 0 finished with value: -939.4449011030545 and parameters: {'learning_rate': 2.4006605088264315e-05, 'batcher': False, 'n_layers': 1}. Best is trial 0 with value: -939.4449011030545.
[I 2023-12-08 19:59:19,023] Trial 1 finished with value: 0.9201144923544472 and parameters: {'learning_rate': 0.002104284711699982, 'batcher': False, 'n_layers': 4, 'n_dim_0': 219, 'n_dim_1': 401, 'n_dim_2': 355}. Best is trial 1 with value: 0.9201144923544472.
[I 2023-12-08 20:00:40,962] Trial 2 finished with value: -1004.7290627621171 and parameters: {'learning_rate': 3.064636911462103e-05, 'batcher': False, 'n_layers': 5, 'n_dim_0': 89, 'n_dim_1': 304, 'n_dim_2': 315, 'n_dim_3': 1}. Best is trial 1 with value: 0.9201144923544472.
[I 2023-12-08 20:02:05,508] Trial 3 finished with value: 0.8962070188960141 and parameters: {'learning_rate': 0.00029349149611142326, 'batcher': False, 'n_layers': 2, 'n_dim_0': 208}. Best is trial 1 with value: 0.9201144923544472.
[I 2023-12-08

In [5]:
best_par_tree_advanced = study_tree_advanced.best_params
print(best_par_tree_advanced)

{'learning_rate': 0.008466422572167911, 'batcher': True, 'n_layers': 4, 'n_dim_0': 353, 'n_dim_1': 7, 'n_dim_2': 484}


In [6]:
study_tree_advanced.sampler.__class__.__name__

'TPESampler'

In [7]:
df_tree_advanced = study_tree_advanced.trials_dataframe()
df_tree_advanced.to_csv("study_label-tree-advanced_baseline_normalized.csv")
display(df_tree_advanced)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_learning_rate,params_n_dim_0,params_n_dim_1,params_n_dim_2,params_n_dim_3,params_n_layers,state
0,0,-939.444901,2023-12-08 19:56:49.744254,2023-12-08 19:57:58.943975,0 days 00:01:09.199721,False,0.000024,,,,,1,COMPLETE
1,1,0.920114,2023-12-08 19:57:58.945480,2023-12-08 19:59:19.023901,0 days 00:01:20.078421,False,0.002104,219.0,401.0,355.0,,4,COMPLETE
2,2,-1004.729063,2023-12-08 19:59:19.025902,2023-12-08 20:00:40.961248,0 days 00:01:21.935346,False,0.000031,89.0,304.0,315.0,1.0,5,COMPLETE
3,3,0.896207,2023-12-08 20:00:40.963248,2023-12-08 20:02:05.507228,0 days 00:01:24.543980,False,0.000293,208.0,,,,2,COMPLETE
4,4,0.905588,2023-12-08 20:02:05.509229,2023-12-08 20:03:23.213526,0 days 00:01:17.704297,True,0.000728,250.0,,,,2,COMPLETE
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,65,0.939706,2023-12-08 21:14:01.704534,2023-12-08 21:15:14.487481,0 days 00:01:12.782947,True,0.003787,360.0,153.0,,,3,COMPLETE
66,66,0.944978,2023-12-08 21:15:14.488480,2023-12-08 21:16:23.830428,0 days 00:01:09.341948,True,0.005396,446.0,56.0,,,3,COMPLETE
67,67,0.931577,2023-12-08 21:16:23.832429,2023-12-08 21:17:36.604296,0 days 00:01:12.771867,True,0.007961,485.0,19.0,,,3,COMPLETE
68,68,0.923358,2023-12-08 21:17:36.605310,2023-12-08 21:18:54.544129,0 days 00:01:17.938819,True,0.005931,387.0,262.0,,,3,COMPLETE


## Tree label

In [8]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [9]:
study_tree = optuna.create_study(study_name="Best tree label config", directions=['maximize'])
study_tree.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 21:20:08,783] A new study created in memory with name: Best tree label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 21:21:18,473] Trial 0 finished with value: 0.8660064817618178 and parameters: {'learning_rate': 0.008646976968890727, 'batcher': True, 'n_layers': 4, 'n_dim_0': 181, 'n_dim_1': 132, 'n_dim_2': 125}. Best is trial 0 with value: 0.8660064817618178.
[I 2023-12-08 21:22:29,376] Trial 1 finished with value: 0.7972039692698816 and parameters: {'learning_rate': 0.0006975641348274792, 'batcher': False, 'n_layers': 2, 'n_dim_0': 190}. Best is trial 0 with value: 0.8660064817618178.
[I 2023-12-08 21:23:45,153] Trial 2 finished with value: 0.8130472632062355 and parameters: {'learning_rate': 0.004318543004301319, 'batcher': False, 'n_layers': 4, 'n_dim_0': 403, 'n_dim_1': 324, 'n_dim_2': 495}. Best is trial 0 with value: 0.8660064817618178.
[I 2023-12-08 21:25:01,064] Trial 3 finished with value: 0.8875302762419887 and parameters: {'learning_rate': 0.0065806054377971785, 'batcher': False, 'n_layers': 4, 'n_dim_0': 274, 'n_dim_1': 11, 'n_dim_2': 427}. Best is trial 3 with value: 0.88

In [10]:
best_par_tree = study_tree.best_params
print(best_par_tree)

{'learning_rate': 0.007698706581726475, 'batcher': False, 'n_layers': 4, 'n_dim_0': 344, 'n_dim_1': 50, 'n_dim_2': 294}


In [11]:
study_tree.sampler.__class__.__name__

'TPESampler'

In [12]:
df_tree = study_tree.trials_dataframe()
df_tree.to_csv("study_label-tree_baseline_normalized.csv")
display(df_tree)

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_learning_rate,params_n_dim_0,params_n_dim_1,params_n_dim_2,params_n_dim_3,params_n_layers,state
0,0,0.866006,2023-12-08 21:20:08.788536,2023-12-08 21:21:18.473993,0 days 00:01:09.685457,True,0.008647,181.0,132.0,125.0,,4,COMPLETE
1,1,0.797204,2023-12-08 21:21:18.474991,2023-12-08 21:22:29.376470,0 days 00:01:10.901479,False,0.000698,190.0,,,,2,COMPLETE
2,2,0.813047,2023-12-08 21:22:29.377470,2023-12-08 21:23:45.153962,0 days 00:01:15.776492,False,0.004319,403.0,324.0,495.0,,4,COMPLETE
3,3,0.88753,2023-12-08 21:23:45.154963,2023-12-08 21:25:01.064178,0 days 00:01:15.909215,False,0.006581,274.0,11.0,427.0,,4,COMPLETE
4,4,0.765364,2023-12-08 21:25:01.065169,2023-12-08 21:26:13.469076,0 days 00:01:12.403907,False,0.000179,441.0,,,,2,COMPLETE
5,5,-749.315467,2023-12-08 21:26:13.471076,2023-12-08 21:27:22.298319,0 days 00:01:08.827243,True,4.3e-05,,,,,1,COMPLETE
6,6,-245.449412,2023-12-08 21:27:22.299318,2023-12-08 21:28:35.022163,0 days 00:01:12.722845,False,2.3e-05,219.0,420.0,163.0,378.0,5,COMPLETE
7,7,-643.681754,2023-12-08 21:28:35.024373,2023-12-08 21:29:48.098389,0 days 00:01:13.074016,True,7.2e-05,,,,,1,COMPLETE
8,8,-740.846402,2023-12-08 21:29:48.099388,2023-12-08 21:31:05.536873,0 days 00:01:17.437485,True,1.1e-05,494.0,42.0,,,3,COMPLETE
9,9,0.749897,2023-12-08 21:31:05.537872,2023-12-08 21:32:21.574676,0 days 00:01:16.036804,False,0.0002,257.0,185.0,423.0,352.0,5,COMPLETE


## Default

In [13]:
# setting global parameters
dataset_path = "../../datasets/"
label = "default" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [None]:
study_default = optuna.create_study(study_name="Best default label config", directions=['maximize'])
study_default.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-08 22:22:45,684] A new study created in memory with name: Best default label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-08 22:24:16,389] Trial 0 finished with value: -184.2231543784893 and parameters: {'learning_rate': 0.00023625067855551036, 'batcher': True, 'n_layers': 2, 'n_dim_0': 164}. Best is trial 0 with value: -184.2231543784893.
[I 2023-12-08 22:25:48,724] Trial 1 finished with value: -3.1922308677762836 and parameters: {'learning_rate': 0.0049030985026801656, 'batcher': False, 'n_layers': 3, 'n_dim_0': 403, 'n_dim_1': 244}. Best is trial 1 with value: -3.1922308677762836.
[I 2023-12-08 22:27:17,114] Trial 2 finished with value: -4.982813001550577 and parameters: {'learning_rate': 0.0020021147337254282, 'batcher': True, 'n_layers': 2, 'n_dim_0': 181}. Best is trial 1 with value: -3.1922308677762836.
[I 2023-12-08 22:28:41,967] Trial 3 finished with value: -4452.742046561163 and parameters: {'learning_rate': 1.2647006209202626e-05, 'batcher': True, 'n_layers': 2, 'n_dim_0': 406}. Best is trial 1 with value: -3.1922308677762836.
[I 2023-12-08 22:30:09,871] Trial 4 finished with value: 

In [None]:
best_par_default = study_default.best_params
print(best_par_default)

In [None]:
study_default.sampler.__class__.__name__

In [None]:
df_default = study_default.trials_dataframe()
df_default.to_csv("study_label-default_baseline_normalized.csv")
display(df_default)