# Data Prep

In [20]:
## Please run them if you don't have the following libraries or have created a new environment.

# import sys
# !{sys.executable} -m pip install numpy pandas scikit-learn matplotlib optuna hyperopt jupyter

In [21]:
# !{sys.executable} -m pip install tensorflow

In [22]:
# !{sys.executable} -m pip install -U "ray[data,train,tune,serve]"

In [23]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.metrics import f1_score, matthews_corrcoef

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy, CategoricalCrossentropy

import psutil
import time
import logging
from datetime import datetime
import random
import os
import shutil
import itertools

import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.air import session
from ray.tune.integration.keras import TuneReportCallback
from ray.tune.search.optuna import OptunaSearch
from ray.tune.tuner import Tuner, TuneConfig
from ray.train import RunConfig
from ray.tune import Trainable

import helper

# Initialize Ray
ray.init(ignore_reinit_error=True)

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
tf.debugging.set_log_device_placement(True)

2024-07-22 18:21:29,044	INFO worker.py:1621 -- Calling ray.init() again after it has already been called.


Num GPUs Available:  1


In [None]:
import os

files_to_delete = ["results/output_A2.pkl", "results/output_A3.pkl", "results/output_A4.pkl", "results/output_A12.pkl", "results/output_A21.pkl"]

for file in files_to_delete:
    if os.path.exists(file):
        os.remove(file)
    else:
        print(f"The file {file} does not exist.")

In [24]:
def delete_folder_contents(folder_path):
    # Check if the folder exists
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        # List all the files and directories in the folder
        for filename in os.listdir(folder_path):
            # Skip .gitignore files
            if filename == '.gitignore':
                continue
            
            file_path = os.path.join(folder_path, filename)
            try:
                # Check if it's a file or directory and remove accordingly
                if os.path.isfile(file_path) or os.path.islink(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path)
            except Exception as e:
                print(f'Failed to delete {file_path}. Reason: {e}')
    else:
        print(f'The folder {folder_path} does not exist or is not a directory.')

folder_path = 'results'

# Call the function to delete all contents
delete_folder_contents(folder_path)

folder_path = 'ray_results'

# Call the function to delete all contents
delete_folder_contents(folder_path)

In [25]:
seed = 42

tf.random.set_seed(seed)
tf.keras.utils.set_random_seed(seed)
np.random.seed(seed)
random.seed(seed)

## Simple FNN

In [26]:
# Neural Network Definition
def create_model(num_classes, activation='relu', learning_rate=0.001, hidden_layers=[30, 15], dropout_rate=0.5):
    model = Sequential()
    model.add(Dense(hidden_layers[0], input_shape=(30,), activation=activation))
    if len(hidden_layers) > 1:
        model.add(Dropout(dropout_rate))
    for units in hidden_layers[1:]:
        model.add(Dense(units, activation=activation))
        model.add(Dropout(dropout_rate))

    model.add(Dense(num_classes, activation='softmax'))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [27]:
def train(model, X_train, y_train, num_epochs, batch_size):
    model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, verbose=0)
    return model

def evaluate(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_test_classes = np.argmax(y_test, axis=1)

    f1_macro = f1_score(y_test_classes, y_pred_classes, average='macro')
    f1_micro = f1_score(y_test_classes, y_pred_classes, average='micro')
    mcc = matthews_corrcoef(y_test_classes, y_pred_classes)
    

    return accuracy, f1_macro, f1_micro, mcc

In [28]:
def evaluate_model_on_dataset_one_split(config, split, split_index):
    test_set = split[split_index]
    indices = [0, 1, 2, 3, 4]
    indices.remove(split_index)
    train_splits = [split[i] for i in indices]
    train_set = pd.concat(train_splits, axis=0)

    classes = train_set['track'].unique()
    num_classes = len(classes)
    #print("number of classes: " + str(num_classes))

    one_hot_columns = train_set['track'].unique()
    one_hot = pd.get_dummies(train_set['track'])
    train_set = train_set.drop('track', axis=1)
    train_set = train_set.join(one_hot).astype(float)

    one_hot_columns = test_set['track'].unique()
    one_hot = pd.get_dummies(test_set['track'])
    test_set = test_set.drop('track', axis=1)
    test_set = test_set.join(one_hot).astype(float)

    X_train = train_set.drop(columns=one_hot_columns).values.reshape(-1, 30)
    y_train = train_set[one_hot_columns].values.reshape(-1, num_classes)
    X_test = test_set.drop(columns=one_hot_columns).values.reshape(-1, 30)
    y_test = test_set[one_hot_columns].values.reshape(-1, num_classes)

    model = create_model(num_classes, activation=config['activation'], learning_rate=config['learning_rate'], hidden_layers=config['hidden_layers'], dropout_rate=config['dropout_rate'])

    # Train the model and collect performance data
    model= train(model, X_train, y_train, config['epochs'], config['batch_size'])
    # Evaluate the model and collect performance data
    accuracy, f1_macro, f1_micro, mcc = evaluate(model, X_test, y_test)


    return accuracy, f1_macro, f1_micro, mcc, num_classes

## 5-Fold-Cross Validation

In [29]:
df_data_spike_1_split = pd.DataFrame()
df_data_spike_full_split = pd.DataFrame()

data_spike_exec_1_split_dict = dict()
data_spike_exec_full_split_dict = dict()


best_params_list_getting = []

def custom_trial_dirname(trial):
    return f"trial_{trial.trial_id}"

In [30]:
def train_and_evaluate(config, splits, splits_name):
    
    global data_spike_exec_1_split_dict
    global data_spike_exec_full_split_dict
    global df_data_spike_1_split
    global results_dir
    
    min_accuracy = 1
    max_accuracy = 0
    avg_accuracy = 0
    min_f1_macro = 1
    max_f1_macro = 0
    avg_f1_macro = 0
    min_f1_micro = 1
    max_f1_micro = 0
    avg_f1_micro = 0
    min_mcc = 1
    max_mcc = -1
    avg_mcc = 0

    num_classes = 0
    
    accuracies = []
    f1_macro_scores = []
    f1_micro_scores = []
    mcc_scores = []

    session_id_for_df = session.get_trial_id()
    
    for i in range(5):
        accuracy, f1_macro, f1_micro, mcc, num_classes = evaluate_model_on_dataset_one_split(config, splits, i)
        accuracies.append(accuracy)
        f1_macro_scores.append(f1_macro)
        f1_micro_scores.append(f1_micro)
        mcc_scores.append(mcc)
        
        avg_accuracy += accuracy
        avg_f1_macro += f1_macro
        avg_f1_micro += f1_micro
        min_accuracy = min(min_accuracy, accuracy)
        max_accuracy = max(max_accuracy, accuracy)
        min_f1_macro = min(min_f1_macro, f1_macro)
        max_f1_macro = max(max_f1_macro, f1_macro)
        min_f1_micro = min(min_f1_micro, f1_micro)
        max_f1_micro = max(max_f1_micro, f1_micro)
        avg_mcc += mcc
        min_mcc = min(min_mcc, mcc)
        max_mcc = max(max_mcc, mcc)


        
        data_spike_exec_1_split_dict[splits_name + "_" + str(i+1) + "_" + session_id_for_df] = {
            "number of classes": num_classes,
            "accuracy": accuracy,
            "macro f1": f1_macro,
            "micro_f1": f1_micro,
            "mcc": mcc,
            "config": str(config)
        }
        
       
    temp_df = pd.DataFrame.from_dict(data_spike_exec_1_split_dict, orient='index')
    df_data_spike_1_split = pd.concat([df_data_spike_1_split, temp_df], axis=0)

        # df_data_spike_1_split = pd.concat([df_data_spike_1_split, pd.DataFrame.from_dict(data_spike_exec_1_split_dict)], axis=1)

        
    avg_accuracy /= 5
    avg_f1_macro /= 5
    avg_f1_micro /= 5
    avg_mcc /= 5
    
    data_spike_exec_full_split_dict[splits_name + "_" + session_id_for_df] = {
        "number of classes": num_classes,
        "min_accuracy": min_accuracy,
        "max_accuracy": max_accuracy,
        "min_f1_macro": min_f1_macro,
        "max_f1_macro": max_f1_macro,
        "min_f1_micro": min_f1_micro,
        "max_f1_micro": max_f1_micro,
        "min_mcc": min_mcc,
        "max_mcc": max_mcc,
        "mean_accuracy": np.mean(accuracies),
        "mean_f1_macro": np.mean(f1_macro_scores),
        "mean_f1_micro": np.mean(f1_micro_scores),
        "mean_mcc": np.mean(mcc_scores),
        "std_accuracy": np.std(accuracies),
        "std_f1_macro": np.std(f1_macro_scores),
        "std_f1_micro": np.std(f1_micro_scores),
        "std_mcc": np.std(mcc_scores),
        "config": str(config)
    }
    

    df_data_spike_full_split = pd.DataFrame.from_dict(data_spike_exec_full_split_dict, orient='index')

    # Load existing data from file if it exists and append new data
    full_split_path = os.path.join(helper.results_dir, f'output_full_{splits_name}.pkl')
    if os.path.exists(full_split_path):
        df_existing_full = pd.read_pickle(full_split_path)
        df_data_spike_full_split = pd.concat([df_existing_full, df_data_spike_full_split], axis=0)
    else:
        print(f"No existing full split data found at {full_split_path}, creating new file.")
    
    # Save the updated full split data to file
    df_data_spike_full_split.to_pickle(full_split_path)

    # Save the 1 split data using the full path
    split_path = os.path.join(helper.results_dir, f'output_{splits_name}.pkl')
    if os.path.exists(split_path):
        df_existing_1_spike = pd.read_pickle(split_path)
        df_data_spike_1_split = pd.concat([df_existing_1_spike, df_data_spike_1_split], axis=0)
    else:
        print(f"No existing 1 split data found at {split_path}, creating new file")

    # Save the updated 1 split data to file
    df_data_spike_1_split.to_pickle(split_path)

    time.sleep(5)
    
    session.report({
        "min_accuracy": min_accuracy,
        "max_accuracy": max_accuracy,
        "min_f1_macro": min_f1_macro,
        "max_f1_macro": max_f1_macro,
        "min_f1_micro": min_f1_micro,
        "max_f1_micro": max_f1_micro,
        "mean_accuracy": np.mean(accuracies),
        "mean_f1_macro": np.mean(f1_macro_scores),
        "mean_f1_micro": np.mean(f1_micro_scores)
    })

In [31]:
def generate_hidden_layers_config(min_layers=2, max_layers=5, min_nodes=5, max_nodes=50, step=5):
    possible_layers = []
    for num_layers in range(min_layers, max_layers + 1):
        layer_configurations = list(itertools.product(range(min_nodes, max_nodes + 1, step), repeat=num_layers))
        possible_layers.extend(layer_configurations)
    return possible_layers

def five_fold_cross_validation(splits, splits_name):

    global best_params_list_getting

    hidden_layers_options = generate_hidden_layers_config()

    config = {
        "activation": tune.choice(["relu", "tanh", "sigmoid"]),
        "learning_rate": tune.loguniform(1e-4, 1e-2),
        "batch_size": tune.choice([32, 64, 128]),
        "hidden_layers": tune.choice(hidden_layers_options),
        "epochs": tune.choice([10, 20, 30, 40, 50]),
        "dropout_rate": tune.uniform(0.2, 0.5)
    }
    
    scheduler = ASHAScheduler(
        metric="mean_accuracy",
        mode="max",
        max_t=10,
        grace_period=1,
        reduction_factor=2
    )
    
    search_alg = OptunaSearch(metric="mean_accuracy", mode="max")
    
    analysis = tune.run(
        tune.with_parameters(train_and_evaluate, splits=splits, splits_name=splits_name),
        resources_per_trial={"cpu": 12, "gpu": 1, "accelerator_type:RTX": 1},
        config=config,
        scheduler=scheduler,
        search_alg=search_alg,
        num_samples=32,
        verbose=1,
        storage_path=helper.ray_results_dir,
        trial_dirname_creator=custom_trial_dirname
    )

    best_config_data_ray_tune = analysis.get_best_config(metric="mean_accuracy", mode="max")
    print("Best hyperparameters found were: ", best_config_data_ray_tune)
    best_params_list_getting.append(best_config_data_ray_tune)
    
    return analysis

In [32]:
analysis = five_fold_cross_validation(helper.a2_splits, "A2")

0,1
Current time:,2024-07-22 20:50:13
Running for:,02:28:42.79
Memory:,5.8/15.4 GiB

Trial name,status,loc,activation,batch_size,dropout_rate,epochs,hidden_layers,learning_rate,acc,iter,total time (s),min_accuracy,max_accuracy,min_f1_macro
train_and_evaluate_25abde00,TERMINATED,172.27.13.81:81174,sigmoid,32,0.393344,10,"(10, 50, 30, 5, 15)",0.00364854,0.557944,1,62.3261,0.495327,0.794393,0.33125
train_and_evaluate_98a6ea08,TERMINATED,172.27.13.81:83122,sigmoid,128,0.298417,50,"(20, 30, 15, 15, 15)",0.00883034,0.852717,1,60.9206,0.796296,0.878505,0.796226
train_and_evaluate_a08ebbcb,TERMINATED,172.27.13.81:88204,sigmoid,64,0.367652,10,"(35, 40, 50, 30, 45)",0.00100164,0.513084,1,67.3011,0.495327,0.560748,0.33125
train_and_evaluate_b9904f24,TERMINATED,172.27.13.81:90202,sigmoid,64,0.235047,40,"(20, 30, 15, 20, 5)",0.00359456,0.837833,1,62.9505,0.759259,0.878505,0.757177
train_and_evaluate_3e2c8d37,TERMINATED,172.27.13.81:94497,sigmoid,32,0.353068,40,"(20, 45, 30, 25, 10)",0.000713396,0.720232,1,79.9112,0.495327,0.831776,0.33125
train_and_evaluate_abf20170,TERMINATED,172.27.13.81:98908,sigmoid,32,0.256644,20,"(40, 5, 15, 35, 30)",0.000186114,0.496262,1,67.6297,0.495327,0.5,0.33125
train_and_evaluate_10861ca1,TERMINATED,172.27.13.81:101673,tanh,32,0.273406,40,"(25, 45, 40, 35, 45)",0.0055603,0.850848,1,81.5581,0.796296,0.897196,0.795172
train_and_evaluate_ca97ac48,TERMINATED,172.27.13.81:106142,tanh,32,0.382854,10,"(10, 50, 25, 5)",0.000831896,0.705452,1,55.4275,0.583333,0.813084,0.581576
train_and_evaluate_2c1cc29a,TERMINATED,172.27.13.81:108020,tanh,128,0.403316,30,"(5, 45, 5, 40, 10)",0.00220911,0.744514,1,68.3658,0.685185,0.831776,0.684753
train_and_evaluate_f91a061e,TERMINATED,172.27.13.81:111574,sigmoid,32,0.222823,50,"(10, 30, 15, 10, 15)",0.000481668,0.766978,1,74.4481,0.666667,0.831776,0.65483


2024-07-22 18:22:39,397	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (10, 50, 30, 5, 15)}
2024-07-22 18:23:44,248	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (20, 30, 15, 15, 15)}
2024-07-22 18:24:55,027	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (35, 40, 50, 30, 45)}
2024-07-22 18:26:03,398	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (20, 30, 15, 20, 5)}
2024-07-22 18:27:30,156	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (20, 45, 30, 25, 10)}
2024-07-22 18:28:43,554	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (40, 5, 15, 35, 30)}
2024-07-22 18

Best hyperparameters found were:  {'activation': 'relu', 'learning_rate': 0.001226746141092834, 'batch_size': 64, 'hidden_layers': (30, 20, 25, 45, 20), 'epochs': 50, 'dropout_rate': 0.25587746853049803}


In [33]:
analysis = five_fold_cross_validation(helper.a3_splits, "A3")

0,1
Current time:,2024-07-22 21:32:04
Running for:,00:41:48.89
Memory:,5.8/15.4 GiB

Trial name,status,loc,activation,batch_size,dropout_rate,epochs,hidden_layers,learning_rate,acc,iter,total time (s),min_accuracy,max_accuracy,min_f1_macro
train_and_evaluate_f48fa8d1,TERMINATED,172.27.13.81:215983,tanh,64,0.412832,50,"(45, 50, 20, 50, 5)",0.000550521,0.774493,1,61.1242,0.724638,0.8,0.720886
train_and_evaluate_bd3dd1a3,TERMINATED,172.27.13.81:221074,relu,128,0.464273,40,"(5, 35, 5, 25, 5)",0.00554311,0.679213,1,69.8364,0.492754,0.811594,0.330097
train_and_evaluate_7533afaf,TERMINATED,172.27.13.81:225381,tanh,64,0.251443,10,"(20, 25, 50, 15, 45)",0.000600224,0.748861,1,93.9647,0.642857,0.811594,0.6422
train_and_evaluate_55abae56,TERMINATED,172.27.13.81:227403,relu,64,0.4846,10,"(35, 50, 50, 25)",0.00388583,0.673085,1,48.1883,0.550725,0.785714,0.462157
train_and_evaluate_579a3bf3,TERMINATED,172.27.13.81:229253,tanh,128,0.325123,40,"(5, 40, 50, 5, 10)",0.000479617,0.69648,1,81.0533,0.594203,0.768116,0.583621
train_and_evaluate_dc476184,TERMINATED,172.27.13.81:233639,sigmoid,128,0.352743,50,"(5, 50, 20, 45)",0.000139499,0.497101,1,68.0634,0.492754,0.507246,0.330097
train_and_evaluate_f5dd0474,TERMINATED,172.27.13.81:238755,relu,128,0.303992,20,"(20, 25, 45, 35, 40)",0.000269082,0.583892,1,86.8796,0.492754,0.652174,0.330097
train_and_evaluate_99cd9389,TERMINATED,172.27.13.81:241594,sigmoid,128,0.477646,20,"(40, 45, 20, 30, 20)",0.000208238,0.505797,1,62.1592,0.5,0.507246,0.333333
train_and_evaluate_4c5c8f00,TERMINATED,172.27.13.81:244285,sigmoid,128,0.437218,40,"(25, 50, 40, 40, 15)",0.00110775,0.502899,1,62.26,0.492754,0.507246,0.330097
train_and_evaluate_8b10105c,TERMINATED,172.27.13.81:248607,relu,32,0.429775,30,"(45, 40, 30, 5)",0.00168868,0.806335,1,60.5708,0.753623,0.855072,0.753623


2024-07-22 20:51:21,029	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (45, 50, 20, 50, 5)}
2024-07-22 20:52:34,039	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (5, 35, 5, 25, 5)}
2024-07-22 20:54:11,854	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (20, 25, 50, 15, 45)}
2024-07-22 20:55:03,492	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (35, 50, 50, 25)}
2024-07-22 20:56:28,252	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (5, 40, 50, 5, 10)}
2024-07-22 20:57:40,032	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (5, 50, 20, 45)}
2024-07-22 20:59:10,409	I

Best hyperparameters found were:  {'activation': 'tanh', 'learning_rate': 0.002673992605245546, 'batch_size': 64, 'hidden_layers': (30, 45, 40, 20, 40), 'epochs': 50, 'dropout_rate': 0.25974001156704213}


In [34]:
analysis = five_fold_cross_validation(helper.a4_splits, "A4")

0,1
Current time:,2024-07-22 22:14:10
Running for:,00:42:04.87
Memory:,5.8/15.4 GiB

Trial name,status,loc,activation,batch_size,dropout_rate,epochs,hidden_layers,learning_rate,acc,iter,total time (s),min_accuracy,max_accuracy,min_f1_macro
train_and_evaluate_ee520a9d,TERMINATED,172.27.13.81:334237,sigmoid,64,0.24957,30,"(50, 50, 35, 15, 45)",0.00914258,0.6282,1,71.3984,0.586294,0.653944,0.567145
train_and_evaluate_b685f2d9,TERMINATED,172.27.13.81:337807,relu,128,0.289763,30,"(10, 45, 15, 30, 45)",0.000360956,0.540727,1,63.2946,0.472081,0.569975,0.446686
train_and_evaluate_a9c37310,TERMINATED,172.27.13.81:341310,tanh,64,0.495531,10,"(45, 35, 40, 30, 5)",0.000197084,0.42068,1,66.9437,0.362944,0.491094,0.29737
train_and_evaluate_d24a9143,TERMINATED,172.27.13.81:343256,relu,32,0.263293,50,"(30, 15, 15, 45, 40)",0.0010308,0.613446,1,106.204,0.569975,0.679389,0.476406
train_and_evaluate_b7ee5927,TERMINATED,172.27.13.81:348600,relu,128,0.453707,40,"(45, 20, 30, 5, 25)",0.000616836,0.349426,1,71.3021,0.333333,0.378173,0.166667
train_and_evaluate_31444e30,TERMINATED,172.27.13.81:352928,tanh,128,0.464311,50,"(10, 15, 35, 5, 15)",0.000150414,0.38051,1,74.7267,0.296954,0.43257,0.2875
train_and_evaluate_90967c34,TERMINATED,172.27.13.81:358069,tanh,128,0.496042,30,"(35, 20, 20, 5, 35)",0.0010838,0.466457,1,62.6155,0.411168,0.501272,0.358443
train_and_evaluate_55fcff3d,TERMINATED,172.27.13.81:361578,relu,64,0.471338,20,"(35, 30, 25, 10, 50)",0.000252773,0.341285,1,62.6964,0.284987,0.375635,0.165711
train_and_evaluate_a02c2db3,TERMINATED,172.27.13.81:364271,tanh,64,0.460786,50,"(40, 25, 10, 25, 45)",0.00422136,0.579365,1,85.9891,0.548223,0.636132,0.476205
train_and_evaluate_5bfc69da,TERMINATED,172.27.13.81:369473,relu,64,0.383785,50,"(40, 30, 15, 35)",0.00029993,0.535658,1,68.6618,0.431472,0.59542,0.407123


2024-07-22 21:33:23,033	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (50, 50, 35, 15, 45)}
2024-07-22 21:34:30,124	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (10, 45, 15, 30, 45)}
2024-07-22 21:35:40,947	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (45, 35, 40, 30, 5)}
2024-07-22 21:37:30,416	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (30, 15, 15, 45, 40)}
2024-07-22 21:38:45,936	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (45, 20, 30, 5, 25)}
2024-07-22 21:40:03,776	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (10, 15, 35, 5, 15)}
2024-07-22 21

Best hyperparameters found were:  {'activation': 'sigmoid', 'learning_rate': 0.003353256725980568, 'batch_size': 64, 'hidden_layers': (35, 50, 50, 10), 'epochs': 40, 'dropout_rate': 0.3225671724019045}


In [35]:
analysis = five_fold_cross_validation(helper.a12_splits, "A12")

0,1
Current time:,2024-07-22 22:53:55
Running for:,00:39:43.95
Memory:,5.8/15.4 GiB

Trial name,status,loc,activation,batch_size,dropout_rate,epochs,hidden_layers,learning_rate,acc,iter,total time (s),min_accuracy,max_accuracy,min_f1_macro
train_and_evaluate_1ab59eca,TERMINATED,172.27.13.81:457219,relu,128,0.29265,50,"(20, 30, 50, 35, 15)",0.000410873,0.186342,1,73.6544,0.165803,0.209845,0.047619
train_and_evaluate_6f39c04f,TERMINATED,172.27.13.81:462387,tanh,32,0.416186,10,"(15, 30, 35, 15, 10)",0.000106417,0.164085,1,57.4598,0.15285,0.186529,0.128781
train_and_evaluate_90be14ca,TERMINATED,172.27.13.81:464263,sigmoid,32,0.32356,20,"(30, 15, 40, 10, 30)",0.00873625,0.191516,1,74.6592,0.170543,0.212435,0.0485651
train_and_evaluate_d0519f93,TERMINATED,172.27.13.81:466999,sigmoid,32,0.24527,20,"(30, 30, 15, 35, 10)",0.00696362,0.231858,1,64.1698,0.196891,0.26615,0.122734
train_and_evaluate_1bb342ec,TERMINATED,172.27.13.81:469686,tanh,64,0.409069,30,"(40, 25, 30, 50, 35)",0.000316896,0.234985,1,69.3215,0.207254,0.246114,0.146746
train_and_evaluate_0b9ef4ad,TERMINATED,172.27.13.81:473227,tanh,32,0.491014,50,"(10, 5, 40, 20, 30)",0.00550822,0.232401,1,104.546,0.204663,0.272021,0.116797
train_and_evaluate_b178ddce,TERMINATED,172.27.13.81:478506,sigmoid,32,0.260832,20,"(20, 35, 45, 25, 35)",0.000346371,0.177027,1,72.7584,0.165803,0.194301,0.0474074
train_and_evaluate_5a39d1ca,TERMINATED,172.27.13.81:481280,tanh,64,0.462692,40,"(45, 10, 10, 15, 10)",0.000344892,0.215839,1,87.1165,0.209845,0.225389,0.163437
train_and_evaluate_ece64c58,TERMINATED,172.27.13.81:485683,sigmoid,128,0.28856,40,"(20, 20, 50, 5, 25)",0.00391505,0.237569,1,64.2215,0.225389,0.25323,0.117946
train_and_evaluate_f5d15ace,TERMINATED,172.27.13.81:490003,tanh,32,0.28499,40,"(10, 25, 5, 40, 10)",0.00565898,0.247927,1,102.431,0.23057,0.272021,0.171615


2024-07-22 22:15:28,773	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (20, 30, 50, 35, 15)}
2024-07-22 22:16:30,135	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (15, 30, 35, 15, 10)}
2024-07-22 22:17:48,434	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (30, 15, 40, 10, 30)}
2024-07-22 22:18:55,906	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (30, 30, 15, 35, 10)}
2024-07-22 22:20:09,164	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (40, 25, 30, 50, 35)}
2024-07-22 22:21:57,392	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (10, 5, 40, 20, 30)}
2024-07-22 

Best hyperparameters found were:  {'activation': 'relu', 'learning_rate': 0.0017102815105530136, 'batch_size': 128, 'hidden_layers': (40, 25, 45, 50), 'epochs': 40, 'dropout_rate': 0.22797009619569603}


In [36]:
analysis = five_fold_cross_validation(helper.a21_splits, "A21")

0,1
Current time:,2024-07-22 23:30:29
Running for:,00:36:32.65
Memory:,5.8/15.4 GiB

Trial name,status,loc,activation,batch_size,dropout_rate,epochs,hidden_layers,learning_rate,acc,iter,total time (s),min_accuracy,max_accuracy,min_f1_macro
train_and_evaluate_c10369c8,TERMINATED,172.27.13.81:588036,sigmoid,32,0.323953,30,"(15, 45, 10, 25, 5)",0.000263225,0.492273,1,63.7445,0.266667,0.580952,0.210526
train_and_evaluate_a2f8cc1e,TERMINATED,172.27.13.81:591542,tanh,64,0.393489,50,"(15, 25, 5, 30, 50)",0.000318363,0.585606,1,80.8977,0.504762,0.733333,0.335443
train_and_evaluate_73d84b35,TERMINATED,172.27.13.81:596731,sigmoid,32,0.382577,40,"(15, 40, 15, 30, 20)",0.000182211,0.585606,1,61.9649,0.504762,0.733333,0.335443
train_and_evaluate_07941559,TERMINATED,172.27.13.81:601038,relu,64,0.319628,10,"(50, 5, 5, 45, 25)",0.000680146,0.577987,1,58.9655,0.504762,0.695238,0.335443
train_and_evaluate_9528de1d,TERMINATED,172.27.13.81:602938,sigmoid,32,0.384691,50,"(35, 45, 35, 20, 35)",0.000392624,0.585606,1,74.3781,0.504762,0.733333,0.335443
train_and_evaluate_455a3bf3,TERMINATED,172.27.13.81:608179,relu,32,0.278024,20,"(15, 50, 30, 10, 20)",0.000196942,0.591285,1,64.7132,0.504762,0.714286,0.335443
train_and_evaluate_a4805dcc,TERMINATED,172.27.13.81:610912,tanh,128,0.301384,30,"(15, 5, 20, 25, 5)",0.000203308,0.579892,1,56.3111,0.514286,0.714286,0.356448
train_and_evaluate_b86851f7,TERMINATED,172.27.13.81:614354,sigmoid,32,0.382055,20,"(40, 10, 45, 35, 30)",0.00259291,0.585606,1,62.6462,0.504762,0.733333,0.335443
train_and_evaluate_da652be8,TERMINATED,172.27.13.81:617133,sigmoid,128,0.212914,40,"(5, 40, 50, 30, 25)",0.00148402,0.585606,1,72.4104,0.504762,0.733333,0.335443
train_and_evaluate_a85721e3,TERMINATED,172.27.13.81:621502,tanh,64,0.296338,10,"(20, 15, 30, 30, 30)",0.00191438,0.587457,1,48.9443,0.504762,0.685714,0.335443


2024-07-22 22:55:04,247	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (15, 45, 10, 25, 5)}
2024-07-22 22:56:28,286	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (15, 25, 5, 30, 50)}
2024-07-22 22:57:33,559	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (15, 40, 15, 30, 20)}
2024-07-22 22:58:36,447	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (50, 5, 5, 45, 25)}
2024-07-22 22:59:53,906	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (35, 45, 35, 20, 35)}
2024-07-22 23:01:02,302	INFO tensorboardx.py:308 -- Removed the following hyperparameter values when logging to tensorboard: {'hidden_layers': (15, 50, 30, 10, 20)}
2024-07-22 23:

Best hyperparameters found were:  {'activation': 'tanh', 'learning_rate': 0.004398572300594925, 'batch_size': 128, 'hidden_layers': (40, 25, 25, 45, 50), 'epochs': 50, 'dropout_rate': 0.2001488958418336}


In [37]:
df_dict_A2 = pd.read_pickle("results/output_A2.pkl")
df_dict_A3 = pd.read_pickle("results/output_A3.pkl")
df_dict_A4 = pd.read_pickle("results/output_A4.pkl")
df_dict_A12 = pd.read_pickle("results/output_A12.pkl")
df_dict_A21 = pd.read_pickle("results/output_A21.pkl")

df_dict = pd.concat([df_dict_A2, df_dict_A3, df_dict_A4, df_dict_A12, df_dict_A21])

df_dict = df_dict.loc[:,~df_dict.columns.duplicated()].copy()
df_dict = df_dict.drop_duplicates()

df_dict.to_csv('outputs/Classic_FFN_one_split_metrics_system.csv')

In [38]:
df_dict_full_A2 = pd.read_pickle("results/output_full_A2.pkl")
df_dict_full_A3 = pd.read_pickle("results/output_full_A3.pkl")
df_dict_full_A4 = pd.read_pickle("results/output_full_A4.pkl")
df_dict_full_A12 = pd.read_pickle("results/output_full_A12.pkl")
df_dict_full_A21 = pd.read_pickle("results/output_full_A21.pkl")

df_full_dict = pd.concat([df_dict_full_A2, df_dict_full_A3, df_dict_full_A4, df_dict_full_A12, df_dict_full_A21])

df_full_dict = df_full_dict.loc[:, ~df_full_dict.columns.duplicated()].copy()
df_full_dict = df_full_dict.drop_duplicates()

df_full_dict.to_csv("outputs/Classic_FFN_full_split_metrics_system.csv")



In [39]:
df_dict_full_A2.to_csv("outputs/Classic_FFN_full_split_metrics_A2.csv")
df_dict_full_A3.to_csv("outputs/Classic_FFN_full_split_metrics_A3.csv")
df_dict_full_A4.to_csv("outputs/Classic_FFN_full_split_metrics_A4.csv")
df_dict_full_A12.to_csv("outputs/Classic_FFN_full_split_metrics_A12.csv")
df_dict_full_A21.to_csv("outputs/Classic_FFN_full_split_metrics_A21.csv")