# Testing single node datasets in regard to node type P

Purpose: until now dataset represented by combination of tree types - now test for single node type

## Importing

In [1]:
import sys
import os

# Construct the path to the parent directory of the parent directory
parent_of_parent_dir = os.path.abspath(os.path.join(os.path.pardir, os.path.pardir))

# Add the parent directory of the parent directory to the system path
sys.path.insert(0, parent_of_parent_dir)

In [2]:
import optuna
import numpy as np
from sklearn.metrics import r2_score
import torch
from torch.utils.data import random_split, DataLoader

from etnn import TreeNode
from etnn.tools.training_tools import ConfigStore
from etnn.data.ferris_wheel import load_pure_ferris_wheel_dataset_single_node
from etnn.tools.training import train_epoch, eval_epoch
from etnn.nn.layer_framework import LayerManagementFramework
from etnn.tools.training_tools import seeding_all
from etnn.routines.run_config import choice_trainloader, choice_loss, choice_optim

## Control constants

In [3]:
dataset_path = "../../datasets/"
test_perc = 0.3
val_perc = 0.21
stability_count = 5
label = "tree-advanced" # although irrelevant

## Defining parameter search

In [4]:
def objective(trial):
    # init default config
    config = ConfigStore(
       in_dim=15,
       hidden_dim=trial.suggest_int("hidden_dim", 16, 512, step=16),
       out_dim=1,
       k=1, # trial.suggest_int("k", 1, 5), # not required as for P k is by default set to 1
       dataset=-1 if normalized else 0,
       ds_size=dataset_size,
       num_gondolas=-1,
       num_part_pg=-1,
       loss_name='mse',
       optimizer_name='adam',
       num_max_epochs=30, # real: 100
       learning_rate=trial.suggest_float("learning_rate", 1e-5, 1e-2, log=True),
       batch_size=1024,
       early_stop_tol=5,
       use_equal_batcher=trial.suggest_categorical("batcher", [True, False]),
       seed=420,
       label_type=label,
       final_label_factor=5/1000
    )
    # config = ConfigStore(
    #     in_dim=15,
    #     hidden_dim=16,
    #     out_dim=1,
    #     k=2,
    #     dataset=-1 if normalized else 0,
    #     ds_size=10_000,
    #     num_gondolas=-1,
    #     num_part_pg=-1,
    #     loss_name='mse',
    #     optimizer_name='adam',
    #     num_max_epochs=30, # real: 100
    #     learning_rate=0.002,
    #     batch_size=1024,
    #     early_stop_tol=5,
    #     use_equal_batcher=False,
    #     seed=420,
    #     label_type=label,
    #     final_label_factor=5/1000
    # )

    # loading dataset
    dataset, df_index = load_pure_ferris_wheel_dataset_single_node(
        node_type=node_type,
        num_elem=size_elem,
        num_to_generate=config.ds_size,
        dataset_path=dataset_path,
        final_label_factor=config.final_label_factor,
        normalize=True if config.dataset == -1 else 0
    )

    if post_normalized:
        dataset.post_normalize()

    # splitting off test dataset
    generator = torch.Generator().manual_seed(config.seed)
    train_ds, val_ds, _ = random_split(
        dataset,
        [1 - test_perc - val_perc, val_perc, test_perc],
        generator=generator
    )

    # loaders
    train_loader = choice_trainloader(config, df_index, train_ds)
    val_loader = DataLoader(val_ds, batch_size=4 * config.batch_size, shuffle=False)

    # define device
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # set seed for reproducability
    seeding_all(config.seed)

    # define model
    model = LayerManagementFramework(
        in_dim=config.in_dim,
        tree=TreeNode(node_type, [TreeNode("E", size_elem)]),
        hidden_dim=config.hidden_dim,
        out_dim=config.out_dim,
        k=config.k
    ).to(device)

    # learning tools
    criterion = choice_loss(config)
    optimizer = choice_optim(config, model)

    # init score list
    score_list = []

    # train for specified number of epochs
    for epoch in range(config.num_max_epochs):
        _, _, _ = train_epoch(
            model,
            train_loader,
            optimizer,
            device,
            criterion
        )

        _, val_true_y, val_pred_y = eval_epoch(
            model,
            val_loader,
            device,
            criterion
        )

        # calc r2 score and append
        score = r2_score(y_true=val_true_y, y_pred=val_pred_y)
        score_list += [score]
        trial.report(score, epoch)

    # calculate objective
    # display(score_list)
    # idea: last x r2 scores (why not last one? for stability purposes)
    obj = np.array(score_list)[-stability_count:]
    return np.mean(obj)

## Parameter search

### Data set size 10

In [5]:
node_type = "P"
size_elem = 10
n_trials = 50
dataset_size = 10
normalized = False
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:34:58,403] A new study created in memory with name: Study node type P, normalized: False, post-normalized:False, dataset size: 10


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-21 09:34:59,017] Trial 0 finished with value: -112.16874099620547 and parameters: {'hidden_dim': 16, 'learning_rate': 0.00044497708732226286, 'batcher': True}. Best is trial 0 with value: -112.16874099620547.
[I 2023-12-21 09:34:59,207] Trial 1 finished with value: -15.677964438141998 and parameters: {'hidden_dim': 336, 'learning_rate': 0.0051539358664526936, 'batcher': False}. Best is trial 1 with value: -15.677964438141998.
[I 2023-12-21 09:34:59,390] Trial 2 finished with value: 0.5213741282107597 and parameters: {'hidden_dim': 128, 'learning_rate': 0.0010211360290853743, 'batcher': False}. Best is trial 2 with value: 0.5213741282107597.
[I 2023-12-21 09:34:59,574] Trial 3 finished with value: 0.16486744251813562 and parameters: {'hidden_dim': 464, 'learning_rate': 3.20271830339219e-05, 'batcher': False}. Best is trial 2 with value: 0.5213741282107597.
[I 2023-12-21 09:34:59,759] Trial 4 finished with value: -250.032712805468 and parameters: {'hidden_dim': 464, 'learning_

In [6]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [7]:
node_type = "P"
size_elem = 10
n_trials = 200
dataset_size = 10
normalized = True
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:35:12,642] A new study created in memory with name: Study node type P, normalized: True, post-normalized:False, dataset size: 10


  0%|          | 0/200 [00:00<?, ?it/s]

[I 2023-12-21 09:35:12,844] Trial 0 finished with value: -1929.187428497077 and parameters: {'hidden_dim': 256, 'learning_rate': 1.3826041288895032e-05, 'batcher': True}. Best is trial 0 with value: -1929.187428497077.
[I 2023-12-21 09:35:13,065] Trial 1 finished with value: -1028.947086292185 and parameters: {'hidden_dim': 208, 'learning_rate': 0.00042434474668089783, 'batcher': True}. Best is trial 1 with value: -1028.947086292185.
[I 2023-12-21 09:35:13,364] Trial 2 finished with value: -57.58693085019072 and parameters: {'hidden_dim': 192, 'learning_rate': 0.00128164247258845, 'batcher': False}. Best is trial 2 with value: -57.58693085019072.
[I 2023-12-21 09:35:13,601] Trial 3 finished with value: -1622.0172400742742 and parameters: {'hidden_dim': 400, 'learning_rate': 0.00013483950059185566, 'batcher': True}. Best is trial 2 with value: -57.58693085019072.
[I 2023-12-21 09:35:13,794] Trial 4 finished with value: -1925.4441875618445 and parameters: {'hidden_dim': 208, 'learning_ra

In [8]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [9]:
node_type = "P"
size_elem = 10
n_trials = 200
dataset_size = 10
normalized = False
post_normalized = True
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:35:58,186] A new study created in memory with name: Study node type P, normalized: False, post-normalized:True, dataset size: 10


  0%|          | 0/200 [00:00<?, ?it/s]

[I 2023-12-21 09:35:58,637] Trial 0 finished with value: -142.47273299021845 and parameters: {'hidden_dim': 480, 'learning_rate': 1.4545152425751644e-05, 'batcher': False}. Best is trial 0 with value: -142.47273299021845.
[I 2023-12-21 09:35:58,961] Trial 1 finished with value: -142.58214530036943 and parameters: {'hidden_dim': 464, 'learning_rate': 2.1067181061366455e-05, 'batcher': False}. Best is trial 0 with value: -142.47273299021845.
[I 2023-12-21 09:35:59,177] Trial 2 finished with value: -142.89056556957462 and parameters: {'hidden_dim': 320, 'learning_rate': 1.443485590650907e-05, 'batcher': False}. Best is trial 0 with value: -142.47273299021845.
[I 2023-12-21 09:35:59,378] Trial 3 finished with value: -143.08186403089366 and parameters: {'hidden_dim': 160, 'learning_rate': 2.2960347377229155e-05, 'batcher': True}. Best is trial 0 with value: -142.47273299021845.
[I 2023-12-21 09:35:59,588] Trial 4 finished with value: -1.25092337820127 and parameters: {'hidden_dim': 256, 'le

In [10]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

### Data set size 100

In [6]:
node_type = "P"
size_elem = 10
n_trials = 50
dataset_size = 100
normalized = False
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:47:02,776] A new study created in memory with name: Study node type P, normalized: False, post-normalized:False, dataset size: 100


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2023-12-21 09:47:03,660] Trial 0 finished with value: -0.8100970147017688 and parameters: {'hidden_dim': 240, 'learning_rate': 5.510179978238913e-05, 'batcher': False}. Best is trial 0 with value: -0.8100970147017688.
[I 2023-12-21 09:47:04,489] Trial 1 finished with value: 0.03713013424811633 and parameters: {'hidden_dim': 48, 'learning_rate': 0.0004669132101364149, 'batcher': False}. Best is trial 1 with value: 0.03713013424811633.
[I 2023-12-21 09:47:05,322] Trial 2 finished with value: -0.2705316693317733 and parameters: {'hidden_dim': 240, 'learning_rate': 0.0009624167067946804, 'batcher': True}. Best is trial 1 with value: 0.03713013424811633.
[I 2023-12-21 09:47:06,150] Trial 3 finished with value: -15.672862973585387 and parameters: {'hidden_dim': 256, 'learning_rate': 0.005336621963136312, 'batcher': True}. Best is trial 1 with value: 0.03713013424811633.
[I 2023-12-21 09:47:06,963] Trial 4 finished with value: 0.3833166041857895 and parameters: {'hidden_dim': 48, 'learning

In [7]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [8]:
node_type = "P"
size_elem = 10
n_trials = 200
dataset_size = 100
normalized = True
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:47:58,788] A new study created in memory with name: Study node type P, normalized: True, post-normalized:False, dataset size: 100


  0%|          | 0/200 [00:00<?, ?it/s]

[I 2023-12-21 09:47:59,637] Trial 0 finished with value: -2.130355252288214 and parameters: {'hidden_dim': 400, 'learning_rate': 0.0024277438833659854, 'batcher': False}. Best is trial 0 with value: -2.130355252288214.
[I 2023-12-21 09:48:00,359] Trial 1 finished with value: -174.52146354531905 and parameters: {'hidden_dim': 192, 'learning_rate': 9.898023436606619e-05, 'batcher': True}. Best is trial 0 with value: -2.130355252288214.
[I 2023-12-21 09:48:01,118] Trial 2 finished with value: -93.26806215134562 and parameters: {'hidden_dim': 384, 'learning_rate': 0.00021854628458147424, 'batcher': True}. Best is trial 0 with value: -2.130355252288214.
[I 2023-12-21 09:48:01,885] Trial 3 finished with value: -2.1530518526958464 and parameters: {'hidden_dim': 496, 'learning_rate': 0.0043143021494922405, 'batcher': False}. Best is trial 0 with value: -2.130355252288214.
[I 2023-12-21 09:48:02,636] Trial 4 finished with value: -175.50760009217805 and parameters: {'hidden_dim': 416, 'learning_

In [9]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [12]:
node_type = "P"
size_elem = 10
n_trials = 200
dataset_size = 100
normalized = False
post_normalized = True
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:53:55,114] A new study created in memory with name: Study node type P, normalized: False, post-normalized:True, dataset size: 100


  0%|          | 0/200 [00:00<?, ?it/s]

[I 2023-12-21 09:53:56,254] Trial 0 finished with value: -2.0832689078870397 and parameters: {'hidden_dim': 512, 'learning_rate': 0.001160868330702806, 'batcher': False}. Best is trial 0 with value: -2.0832689078870397.
[I 2023-12-21 09:53:57,166] Trial 1 finished with value: -200.5162725529696 and parameters: {'hidden_dim': 64, 'learning_rate': 1.8465899807707633e-05, 'batcher': True}. Best is trial 0 with value: -2.0832689078870397.
[I 2023-12-21 09:53:58,136] Trial 2 finished with value: -12.92295694013917 and parameters: {'hidden_dim': 368, 'learning_rate': 0.0005917873133721105, 'batcher': True}. Best is trial 0 with value: -2.0832689078870397.
[I 2023-12-21 09:53:59,109] Trial 3 finished with value: -2.732267256998926 and parameters: {'hidden_dim': 240, 'learning_rate': 0.00345098935923933, 'batcher': True}. Best is trial 0 with value: -2.0832689078870397.
[I 2023-12-21 09:54:00,101] Trial 4 finished with value: -200.17739890716132 and parameters: {'hidden_dim': 64, 'learning_rat

In [13]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

### Data set size 1000

In [14]:
node_type = "P"
size_elem = 10
n_trials = 50
dataset_size = 1000
normalized = False
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 09:58:43,648] A new study created in memory with name: Study node type P, normalized: False, post-normalized:False, dataset size: 1000


  0%|          | 0/50 [00:00<?, ?it/s]


100%|██████████| 1000/1000 [00:00<00:00, 142882.10it/s]

  0%|          | 0/1000 [00:00<?, ?it/s][A
  1%|          | 10/1000 [00:00<00:10, 93.19it/s][A
  2%|▏         | 21/1000 [00:04<03:56,  4.14it/s][A
  9%|▊         | 86/1000 [00:04<00:36, 24.71it/s][A
 15%|█▌        | 151/1000 [00:04<00:16, 51.88it/s][A
 20%|█▉        | 197/1000 [00:04<00:10, 75.67it/s][A
 24%|██▍       | 245/1000 [00:04<00:07, 106.82it/s][A
 35%|███▍      | 346/1000 [00:04<00:03, 196.16it/s][A
 45%|████▍     | 449/1000 [00:04<00:01, 299.14it/s][A
 55%|█████▌    | 553/1000 [00:05<00:01, 409.86it/s][A
 65%|██████▍   | 649/1000 [00:05<00:00, 506.77it/s][A
 75%|███████▌  | 750/1000 [00:05<00:00, 608.08it/s][A
 85%|████████▌ | 852/1000 [00:05<00:00, 698.53it/s][A
100%|██████████| 1000/1000 [00:05<00:00, 180.78it/s][A


[I 2023-12-21 09:58:56,332] Trial 0 finished with value: 0.4563118555960946 and parameters: {'hidden_dim': 48, 'learning_rate': 0.005241252490972763, 'batcher': False}. Best is trial 0 with value: 0.4563118555960946.
[I 2023-12-21 09:59:03,100] Trial 1 finished with value: -4.435503860340775 and parameters: {'hidden_dim': 16, 'learning_rate': 0.0030901439016735666, 'batcher': False}. Best is trial 0 with value: 0.4563118555960946.
[I 2023-12-21 09:59:10,189] Trial 2 finished with value: -6.377707500708668 and parameters: {'hidden_dim': 336, 'learning_rate': 0.006764601283068002, 'batcher': True}. Best is trial 0 with value: 0.4563118555960946.
[I 2023-12-21 09:59:17,332] Trial 3 finished with value: -80491.58740974138 and parameters: {'hidden_dim': 336, 'learning_rate': 0.009892288079408697, 'batcher': True}. Best is trial 0 with value: 0.4563118555960946.
[I 2023-12-21 09:59:24,535] Trial 4 finished with value: 0.4216928999731996 and parameters: {'hidden_dim': 464, 'learning_rate': 0.

In [15]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [16]:
node_type = "P"
size_elem = 10
n_trials = 100
dataset_size = 1000
normalized = True
post_normalized = False
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 10:07:21,234] A new study created in memory with name: Study node type P, normalized: True, post-normalized:False, dataset size: 1000


  0%|          | 0/100 [00:00<?, ?it/s]


100%|██████████| 1000/1000 [00:00<00:00, 200043.12it/s]

  0%|          | 0/1000 [00:00<?, ?it/s][A
  1%|          | 11/1000 [00:00<00:09, 108.92it/s][A
  2%|▏         | 22/1000 [00:04<03:33,  4.58it/s] [A
  8%|▊         | 76/1000 [00:04<00:40, 22.67it/s][A
 12%|█▏        | 117/1000 [00:04<00:21, 40.49it/s][A
 15%|█▌        | 150/1000 [00:04<00:14, 57.64it/s][A
 18%|█▊        | 181/1000 [00:04<00:10, 76.99it/s][A
 24%|██▍       | 239/1000 [00:04<00:05, 128.00it/s][A
 30%|███       | 305/1000 [00:04<00:03, 195.15it/s][A
 37%|███▋      | 372/1000 [00:04<00:02, 267.69it/s][A
 44%|████▍     | 442/1000 [00:04<00:01, 344.62it/s][A
 51%|█████     | 508/1000 [00:05<00:01, 408.02it/s][A
 58%|█████▊    | 576/1000 [00:05<00:00, 467.77it/s][A
 64%|██████▍   | 642/1000 [00:05<00:00, 514.25it/s][A
 71%|███████   | 706/1000 [00:05<00:00, 524.38it/s][A
 77%|███████▋  | 767/1000 [00:05<00:00, 546.18it/s][A
 83%|████████▎ | 832/1000 [00:05<00:00, 574.11it/s][A
 90%|████████▉ | 899/1000

[I 2023-12-21 10:07:33,847] Trial 0 finished with value: -2.7744807732550725 and parameters: {'hidden_dim': 176, 'learning_rate': 0.005412181723365831, 'batcher': True}. Best is trial 0 with value: -2.7744807732550725.
[I 2023-12-21 10:07:40,011] Trial 1 finished with value: -122.90004759365483 and parameters: {'hidden_dim': 48, 'learning_rate': 0.0013228071907257697, 'batcher': True}. Best is trial 0 with value: -2.7744807732550725.
[I 2023-12-21 10:07:46,663] Trial 2 finished with value: -3.170641363691481 and parameters: {'hidden_dim': 240, 'learning_rate': 0.004864912467629095, 'batcher': False}. Best is trial 0 with value: -2.7744807732550725.
[I 2023-12-21 10:07:54,005] Trial 3 finished with value: -21.35289195311581 and parameters: {'hidden_dim': 256, 'learning_rate': 0.0005729431096605534, 'batcher': False}. Best is trial 0 with value: -2.7744807732550725.
[I 2023-12-21 10:08:00,121] Trial 4 finished with value: -40.42019436242909 and parameters: {'hidden_dim': 32, 'learning_ra

In [17]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

In [22]:
node_type = "P"
size_elem = 10
n_trials = 150
dataset_size = 1000
normalized = False
post_normalized = True
study = optuna.create_study(study_name=f"Study node type {node_type}, normalized: {normalized}, post-normalized:{post_normalized}, dataset size: {dataset_size}", directions=['maximize'])
study.optimize(objective, n_trials=n_trials, show_progress_bar=True)

[I 2023-12-21 10:57:56,291] A new study created in memory with name: Study node type P, normalized: False, post-normalized:True, dataset size: 1000


  0%|          | 0/150 [00:00<?, ?it/s]

[I 2023-12-21 10:58:05,309] Trial 0 finished with value: -204.07519942980753 and parameters: {'hidden_dim': 432, 'learning_rate': 8.758221957163055e-05, 'batcher': False}. Best is trial 0 with value: -204.07519942980753.
[I 2023-12-21 10:58:13,560] Trial 1 finished with value: -216.83649527696934 and parameters: {'hidden_dim': 416, 'learning_rate': 1.3484110998970076e-05, 'batcher': True}. Best is trial 0 with value: -204.07519942980753.
[I 2023-12-21 10:58:22,297] Trial 2 finished with value: -12.311722032440752 and parameters: {'hidden_dim': 368, 'learning_rate': 0.00038112070901573386, 'batcher': True}. Best is trial 2 with value: -12.311722032440752.
[I 2023-12-21 10:58:32,153] Trial 3 finished with value: -3.199915176107344 and parameters: {'hidden_dim': 464, 'learning_rate': 0.0008147262883802487, 'batcher': False}. Best is trial 3 with value: -3.199915176107344.
[I 2023-12-21 10:58:44,358] Trial 4 finished with value: -26.336554905359765 and parameters: {'hidden_dim': 96, 'learn

In [23]:
study.trials_dataframe().to_csv(f"csvs/single-node_t-{node_type}_n-{size_elem}_normalized-{normalized}_post-normalized-{post_normalized}_dataset-size-{dataset_size}.csv")

### Data set size 10.000 present in single_nodetest NB