# Parameter search with optuna - ETNN

## Importing section

In [1]:
import optuna

import numpy as np

import torch
from sklearn.metrics import r2_score
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.routines.run_config import choice_dataset, choice_trainloader, choice_loss, choice_optim
from etnn.tools.training import train_epoch, eval_epoch
from etnn.tools.training_tools import ConfigStore, seeding_all

## Definition of objective function for ETNN

In [2]:
def objective(trial):
    # init default config
    config = ConfigStore(
        in_dim=15,
        hidden_dim=trial.suggest_int("hidden_dim", 16, 512, step=16),
        out_dim=1,
        k=trial.suggest_int("k", 1, 5),
        dataset=-2,
        ds_size=10_000,
        num_gondolas=10,
        num_part_pg=5,
        loss_name='mse',
        optimizer_name='adam',
        num_max_epochs=30, # real: 100
        learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
        batch_size=1024,
        early_stop_tol=5,
        use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
        seed=420,
        label_type=label,
        final_label_factor=1/1000
    )

    # loading dataset
    dataset, df_index = choice_dataset(config, dataset_path)
    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # build tree
    tree_structure = TreeNode(
        node_type="C",
        children=[
            TreeNode("P", [TreeNode("E", config.num_part_pg)])
            for _ in range(config.num_gondolas)
        ]
    )

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    model = LayerManagementFramework(
        in_dim=config.in_dim,
        tree=tree_structure,
        hidden_dim=config.hidden_dim,
        out_dim=config.out_dim,
        k=config.k
    ).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Tree advanced label

In [3]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree_advanced" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [4]:
study_tree_advanced = optuna.create_study(study_name="Best tree advanced label config", directions=['maximize'])
study_tree_advanced.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 12:51:35,785] A new study created in memory with name: Best tree advanced label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-21 12:53:19,596] Trial 0 finished with value: 0.0977191222896564 and parameters: {'hidden_dim': 352, 'k': 4, 'learning_rate': 1.3891305704530852e-05, 'batcher': True}. Best is trial 0 with value: 0.0977191222896564.
[I 2023-12-21 12:54:54,245] Trial 1 finished with value: 0.24808984913764318 and parameters: {'hidden_dim': 160, 'k': 1, 'learning_rate': 0.00021499137805169375, 'batcher': False}. Best is trial 1 with value: 0.24808984913764318.
[I 2023-12-21 12:56:23,739] Trial 2 finished with value: -29.361836369721278 and parameters: {'hidden_dim': 32, 'k': 3, 'learning_rate': 0.00011960656821889805, 'batcher': False}. Best is trial 1 with value: 0.24808984913764318.
[I 2023-12-21 12:57:57,679] Trial 3 finished with value: 0.5375378856734001 and parameters: {'hidden_dim': 176, 'k': 3, 'learning_rate': 0.0003936521290030166, 'batcher': True}. Best is trial 3 with value: 0.5375378856734001.
[I 2023-12-21 12:59:32,025] Trial 4 finished with value: 0.6682791529789223 and paramete

In [5]:
df_tree_advanced = study_tree_advanced.trials_dataframe()
df_tree_advanced.to_csv("study_label-tree-advanced_post-normalized.csv")
df_tree_advanced

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.097719,2023-12-21 12:51:35.792473,2023-12-21 12:53:19.596971,0 days 00:01:43.804498,True,352,4,1.4e-05,COMPLETE
1,1,0.24809,2023-12-21 12:53:19.598972,2023-12-21 12:54:54.244127,0 days 00:01:34.645155,False,160,1,0.000215,COMPLETE
2,2,-29.361836,2023-12-21 12:54:54.246125,2023-12-21 12:56:23.738921,0 days 00:01:29.492796,False,32,3,0.00012,COMPLETE
3,3,0.537538,2023-12-21 12:56:23.740921,2023-12-21 12:57:57.678507,0 days 00:01:33.937586,True,176,3,0.000394,COMPLETE
4,4,0.668279,2023-12-21 12:57:57.680506,2023-12-21 12:59:32.025998,0 days 00:01:34.345492,True,304,4,0.000215,COMPLETE
5,5,0.302339,2023-12-21 12:59:32.026998,2023-12-21 13:01:09.665019,0 days 00:01:37.638021,True,336,3,6.4e-05,COMPLETE
6,6,0.204767,2023-12-21 13:01:09.666020,2023-12-21 13:02:42.058216,0 days 00:01:32.392196,False,48,2,0.000885,COMPLETE
7,7,-27.309851,2023-12-21 13:02:42.060208,2023-12-21 13:04:18.367521,0 days 00:01:36.307313,False,448,3,0.007646,COMPLETE
8,8,0.869621,2023-12-21 13:04:18.368520,2023-12-21 13:05:52.461882,0 days 00:01:34.093362,False,304,3,0.002795,COMPLETE
9,9,0.352814,2023-12-21 13:05:52.463882,2023-12-21 13:07:26.845881,0 days 00:01:34.381999,True,368,3,0.000102,COMPLETE


## Tree label

In [6]:
# setting global parameters
dataset_path = "../../datasets/"
label = "tree" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [7]:
study_tree = optuna.create_study(study_name="Best tree label config", directions=['maximize'])
study_tree.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 14:16:24,163] A new study created in memory with name: Best tree label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-21 14:18:05,449] Trial 0 finished with value: 0.14407992071313577 and parameters: {'hidden_dim': 32, 'k': 3, 'learning_rate': 0.0011136810491043211, 'batcher': False}. Best is trial 0 with value: 0.14407992071313577.
[I 2023-12-21 14:19:41,128] Trial 1 finished with value: -860.6019694678884 and parameters: {'hidden_dim': 48, 'k': 2, 'learning_rate': 3.196509884869571e-05, 'batcher': True}. Best is trial 0 with value: 0.14407992071313577.
[I 2023-12-21 14:21:34,109] Trial 2 finished with value: 0.6126493377856084 and parameters: {'hidden_dim': 304, 'k': 5, 'learning_rate': 0.00019396014575997307, 'batcher': True}. Best is trial 2 with value: 0.6126493377856084.
[I 2023-12-21 14:22:57,487] Trial 3 finished with value: 0.2197909741910385 and parameters: {'hidden_dim': 32, 'k': 3, 'learning_rate': 0.005017222614420588, 'batcher': False}. Best is trial 2 with value: 0.6126493377856084.
[I 2023-12-21 14:24:22,947] Trial 4 finished with value: 0.2540733925035495 and parameters: {'

In [8]:
df_tree = study_tree.trials_dataframe()
df_tree.to_csv("study_label-tree_post-normalized.csv")
df_tree

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.14408,2023-12-21 14:16:24.168385,2023-12-21 14:18:05.449640,0 days 00:01:41.281255,False,32,3,0.001114,COMPLETE
1,1,-860.601969,2023-12-21 14:18:05.451151,2023-12-21 14:19:41.128180,0 days 00:01:35.677029,True,48,2,3.2e-05,COMPLETE
2,2,0.612649,2023-12-21 14:19:41.129684,2023-12-21 14:21:34.107881,0 days 00:01:52.978197,True,304,5,0.000194,COMPLETE
3,3,0.219791,2023-12-21 14:21:34.110390,2023-12-21 14:22:57.487794,0 days 00:01:23.377404,False,32,3,0.005017,COMPLETE
4,4,0.254073,2023-12-21 14:22:57.489298,2023-12-21 14:24:22.947632,0 days 00:01:25.458334,False,80,4,0.001797,COMPLETE
5,5,0.087912,2023-12-21 14:24:22.949631,2023-12-21 14:25:47.737127,0 days 00:01:24.787496,False,96,2,0.000365,COMPLETE
6,6,-1005.406916,2023-12-21 14:25:47.738632,2023-12-21 14:27:12.173795,0 days 00:01:24.435163,True,32,5,4.4e-05,COMPLETE
7,7,0.135581,2023-12-21 14:27:12.175795,2023-12-21 14:28:53.787708,0 days 00:01:41.611913,True,448,3,1.2e-05,COMPLETE
8,8,0.390922,2023-12-21 14:28:53.788707,2023-12-21 14:30:32.361230,0 days 00:01:38.572523,False,448,1,0.000299,COMPLETE
9,9,0.358443,2023-12-21 14:30:32.362228,2023-12-21 14:32:03.667128,0 days 00:01:31.304900,True,240,4,9.6e-05,COMPLETE


## Default

In [9]:
# setting global parameters
dataset_path = "../../datasets/"
label = "default" # alt: tree or default
test_perc = 0.3
val_perc = 0.21
stability_count = 5
n_trials = 50

In [10]:
study_default = optuna.create_study(study_name="Best default label config", directions=['maximize'])
study_default.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 15:36:03,204] A new study created in memory with name: Best default label config


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-21 15:37:39,160] Trial 0 finished with value: 0.3574514377402941 and parameters: {'hidden_dim': 400, 'k': 1, 'learning_rate': 0.0042560567261239355, 'batcher': False}. Best is trial 0 with value: 0.3574514377402941.
[I 2023-12-21 15:39:14,840] Trial 1 finished with value: -0.6313774668485923 and parameters: {'hidden_dim': 192, 'k': 5, 'learning_rate': 3.662360678716699e-05, 'batcher': True}. Best is trial 0 with value: 0.3574514377402941.
[I 2023-12-21 15:40:44,139] Trial 2 finished with value: -132.92048838386273 and parameters: {'hidden_dim': 16, 'k': 4, 'learning_rate': 3.846858467753068e-05, 'batcher': False}. Best is trial 0 with value: 0.3574514377402941.
[I 2023-12-21 15:42:12,483] Trial 3 finished with value: -133.04816702256807 and parameters: {'hidden_dim': 48, 'k': 2, 'learning_rate': 1.3647819057827714e-05, 'batcher': False}. Best is trial 0 with value: 0.3574514377402941.
[I 2023-12-21 15:43:37,066] Trial 4 finished with value: -0.5318397086120491 and parameters

In [11]:
df_default = study_default.trials_dataframe()
df_default.to_csv("study_label-default_post-normalized.csv")
df_default

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_batcher,params_hidden_dim,params_k,params_learning_rate,state
0,0,0.357451,2023-12-21 15:36:03.210451,2023-12-21 15:37:39.160549,0 days 00:01:35.950098,False,400,1,0.004256,COMPLETE
1,1,-0.631377,2023-12-21 15:37:39.161549,2023-12-21 15:39:14.840340,0 days 00:01:35.678791,True,192,5,3.7e-05,COMPLETE
2,2,-132.920488,2023-12-21 15:39:14.842341,2023-12-21 15:40:44.138448,0 days 00:01:29.296107,False,16,4,3.8e-05,COMPLETE
3,3,-133.048167,2023-12-21 15:40:44.140468,2023-12-21 15:42:12.483503,0 days 00:01:28.343035,False,48,2,1.4e-05,COMPLETE
4,4,-0.53184,2023-12-21 15:42:12.484503,2023-12-21 15:43:37.066940,0 days 00:01:24.582437,True,32,5,0.000481,COMPLETE
5,5,0.03353,2023-12-21 15:43:37.068940,2023-12-21 15:45:09.065639,0 days 00:01:31.996699,True,288,1,0.00081,COMPLETE
6,6,0.171099,2023-12-21 15:45:09.066639,2023-12-21 15:46:37.503686,0 days 00:01:28.437047,True,144,3,0.00575,COMPLETE
7,7,-0.180327,2023-12-21 15:46:37.504687,2023-12-21 15:48:04.145321,0 days 00:01:26.640634,True,128,1,0.001161,COMPLETE
8,8,0.026678,2023-12-21 15:48:04.146321,2023-12-21 15:49:35.389529,0 days 00:01:31.243208,True,272,1,0.000318,COMPLETE
9,9,-0.088359,2023-12-21 15:49:35.392531,2023-12-21 15:51:15.790841,0 days 00:01:40.398310,False,512,2,9.7e-05,COMPLETE
