# Data Prep

In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import os
import itertools

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, matthews_corrcoef
from sklearn.preprocessing import LabelBinarizer

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, ReLU, Input
from tensorflow.keras.optimizers import Adam
from math import floor

import psutil
import time
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

import ray
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.air import session
from ray.tune.integration.keras import TuneReportCallback
from ray.tune.search.optuna import OptunaSearch

import helper

# Initialize Ray
ray.init(ignore_reinit_error=True)

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
import torch

# Neural Network Definition
import torch.nn as nn
import torch.nn.functional as F

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#tf.debugging.set_log_device_placement(True)

2024-08-19 11:18:40.006624: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-19 11:18:40.085142: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-19 11:18:40.180158: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-19 11:18:40.209634: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-19 11:18:40.377690: I tensorflow/core/platform/cpu_feature_guar

Num GPUs Available:  1


I0000 00:00:1724059139.371282   28022 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724059139.847836   28022 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1724059139.848076   28022 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


In [2]:
files_to_delete = ["results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A2.pkl", "results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A3.pkl", "results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A4.pkl", "results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A12.pkl", "results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A21.pkl"]

for file in files_to_delete:
    if os.path.exists(file):
        os.remove(file)
    else:
        print(f"The file {file} does not exist.")

The file results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A2.pkl does not exist.
The file results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A3.pkl does not exist.
The file results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A4.pkl does not exist.
The file results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A12.pkl does not exist.
The file results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/output_A21.pkl does not exist.


## Feature-Based MTL model

In [3]:
# TODO activation for now only relu
class FeatureBasedMTLModel(nn.Module):
    def __init__(self, hidden_layers_shared=[50,30], hidden_layers_outputs=[[10],[10],[10],[10],[10]], activation='relu', dropout=0.5):
        super(FeatureBasedMTLModel, self).__init__()

        self.activation = activation
        self.dropout = dropout

        # shared layers
        self.shared_layers = nn.ModuleList()
        input_dim = 30
        for hidden_layer in hidden_layers_shared:
            self.shared_layers.append(nn.Linear(input_dim, hidden_layer))
            self.shared_layers.append(nn.Dropout(self.dropout))  # Add dropout layer
            input_dim = hidden_layer

        # output layers
        self.output_a2 = self.create_output_layers(hidden_layers_outputs[0], input_dim, 2)
        self.output_a3 = self.create_output_layers(hidden_layers_outputs[1], input_dim, 2)
        self.output_a4 = self.create_output_layers(hidden_layers_outputs[2], input_dim, 3)
        self.output_a12 = self.create_output_layers(hidden_layers_outputs[3], input_dim, 6)
        self.output_a21 = self.create_output_layers(hidden_layers_outputs[4], input_dim, 2)


    def create_output_layers(self, hidden_layers, input_dim, output_dim):
        layers = nn.ModuleList()
        for hidden_layer in hidden_layers:
            layers.append(nn.Linear(input_dim, hidden_layer))
            layers.append(nn.Dropout(self.dropout)) 
            input_dim = hidden_layer
        layers.append(nn.Linear(input_dim, output_dim))
        return layers

    def forward(self, x: torch.Tensor, task_id: str):
        # shared layers
        for layer in self.shared_layers:
            if self.activation == 'relu':
                x = F.relu(layer(x))
            elif self.activation == 'tanh':
                x = F.tanh(layer(x))
            elif self.activation == 'sigmoid':
                x = F.sigmoid(layer(x))
            else:
                raise ValueError(f'Invalid activation: {self.activation}')

        
        # output layers
        if task_id == 'a2':
            return self.forward_output_layers(x, self.output_a2)
        elif task_id == 'a3':
            return self.forward_output_layers(x, self.output_a3)
        elif task_id == 'a4':
            return self.forward_output_layers(x, self.output_a4)
        elif task_id == 'a12':
            return self.forward_output_layers(x, self.output_a12)
        elif task_id == 'a21':
            return self.forward_output_layers(x, self.output_a21)
        else:
            raise ValueError(f'Invalid task_id: {task_id}')
        

    def forward_output_layers(self, x, layers):
        for layer in layers[:-1]:
            if self.activation == 'relu':
                x = F.relu(layer(x))
            elif self.activation == 'tanh':
                x = F.tanh(layer(x))
            elif self.activation == 'sigmoid':
                x = F.sigmoid(layer(x))
            else:
                raise ValueError(f'Invalid activation: {self.activation}')
            
        x = layers[-1](x)
        return F.softmax(x, dim=1)


In [4]:

def create_batches(X_train, y_train, batch_size):
    X_train_batches = {'a2': {}, 'a3': {}, 'a4': {}, 'a12': {}, 'a21': {}}
    y_train_batches = {'a2': {}, 'a3': {}, 'a4': {}, 'a12': {}, 'a21': {}}

    for dataset in helper.dataset_list:

        for i in range(0, floor(len(X_train[dataset])/batch_size)):
                X_train_batches[dataset][str(i)] =  X_train[dataset].iloc[i* batch_size:i*batch_size+batch_size]
                y_train_batches[dataset][str(i)] =  y_train[dataset].iloc[i* batch_size:i* batch_size+batch_size]

    return X_train_batches, y_train_batches

# batchsize in this case refers to the size of batch per dataset
def train(model, X_train, y_train, num_epochs, batch_size, learning_rate):
        
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters())

        max_number_of_batches = max(len(X_train[dataset]) // batch_size for dataset in helper.dataset_list)

        X_train, y_train = create_batches(X_train, y_train, batch_size)

        for epoch in range(num_epochs):
                optimizer.zero_grad()

                losses_per_epoch = {'a2': 0, 'a3': 0, 'a4': 0, 'a12': 0, 'a21': 0}

                for batch_number in range(0, max_number_of_batches):

                        for dataset in helper.dataset_list:

                                if len(X_train[dataset]) > batch_number:

                                        X_train_tensor = torch.tensor(X_train[dataset][str(batch_number)].values, dtype=torch.float32)
                                        X_train_tensor.to(device)
                                        y_train_tensor= torch.tensor(y_train[dataset][str(batch_number)].values, dtype=torch.float32)
                                        y_train_tensor.to(device)
                                        outputs = model(X_train_tensor, task_id = dataset)

                                        
                                        loss = criterion(outputs, y_train_tensor)

                                        if dataset == 'a12':
                                                losses_per_epoch[dataset] += loss
                                        else:
                                                losses_per_epoch[dataset] += loss

                for dataset in helper.dataset_list:
                        losses_per_epoch[dataset] /= len(X_train[dataset])

                # for now sum of average loss per dataset
                loss = sum(losses_per_epoch.values())
                loss.backward()
                optimizer.step()

        return model


def mcc(fn, fp, tn, tp):
    return (tp * tn - fp * fn) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

def evaluate(model, X_test, y_test):

    eval_dict = {'a2': {}, 'a3': {}, 'a4': {}, 'a12': {}, 'a21': {}}
    num_classes = {'a2': 2, 'a3': 2, 'a4': 3, 'a12': 6, 'a21': 2}
    for dataset in helper.dataset_list:
        X_test_tensor = torch.tensor(X_test[dataset].values, dtype=torch.float32)
        X_test_tensor.to(device)
        #y_test_tensor = torch.tensor(y_test[dataset].values, dtype=torch.float32)
        y_pred_tensor = model(X_test_tensor, task_id = dataset)
        y_pred = y_pred_tensor.detach().numpy()
        y_pred = np.argmax(y_pred, axis=1)

        y_test_np = y_test[dataset].to_numpy()
        y_test_np = np.argmax(y_test_np, axis=1)


        # Calculate classification metrics using one-hot encoded targets
        eval_dict[dataset]['accuracy'] = accuracy_score(y_test_np, y_pred)
        eval_dict[dataset]['micro_f1'] = f1_score(y_test_np, y_pred, average='micro')
        eval_dict[dataset]['macro_f1'] = f1_score(y_test_np, y_pred, average='macro')

        eval_dict[dataset]['mcc'] = matthews_corrcoef(y_test_np, y_pred)

    return eval_dict

In [5]:
def evaluate_model_on_dataset_one_split(split_index, config):
    

    X_train, X_test, y_train, y_test =  helper.get_joined_train_test_folds(split_index)

    model = FeatureBasedMTLModel(activation=config['activation'], hidden_layers_shared=config['shared_hidden_layers'], hidden_layers_outputs=[
        config['a2_output_hidden_layers'], config['a3_output_hidden_layers'], config['a4_output_hidden_layers'], config['a12_output_hidden_layers'], config['a21_output_hidden_layers']
    ], dropout=config['dropout_rate'])

    # Train the model and collect performance data
    model = train(model, X_train, y_train, num_epochs=config['epochs'], batch_size=config['batch_size'], learning_rate=config['learning_rate'],)
    # Evaluate the model and collect performance data
    eval_dict = evaluate(model, X_test, y_test)

    return eval_dict


In [6]:
#evaluate_model_on_dataset_one_split(1, {
#    'activation': 'sigmoid',
#    'shared_hidden_layers': [50, 30],
#    'a2_output_hidden_layers': [10],
#    'a3_output_hidden_layers': [10],
#    'a4_output_hidden_layers': [10],
#    'a12_output_hidden_layers': [10],
#    'a21_output_hidden_layers': [10],
#    'epochs': 10,
#    'batch_size': 32,
#    'learning_rate': 0.001
#})

## 5-Fold-Cross Validation

In [7]:
df_data_spike_1_split = {
    'a2': pd.DataFrame(),
    'a3': pd.DataFrame(),
    'a4': pd.DataFrame(),
    'a12': pd.DataFrame(),
    'a21': pd.DataFrame()
}

df_data_spike_full_split = pd.DataFrame()

data_spike_exec_1_split_dict = dict()
data_spike_exec_full_split_dict = {
    'a2': {},
    'a3': {},
    'a4': {},
    'a12': {},
    'a21': {}
}


best_params_list_getting = []

def custom_trial_dirname(trial):
    return f"trial_{trial.trial_id}"

In [8]:
def train_and_evaluate(config):
    
    global data_spike_exec_1_split_dict
    global data_spike_exec_full_split_dict
    global df_data_spike_1_split
    global results_dir

    overall_result = {
    "a2": {
        "accuracy_scores": [],
        "f1_macro_scores": [],
        "f1_micro_scores": [],
        "mcc_scores": []
    },
    "a3": {
        "accuracy_scores": [],
        "f1_macro_scores": [],
        "f1_micro_scores": [],
        "mcc_scores": []
    },
    "a4": { 
        "accuracy_scores": [],
        "f1_macro_scores": [],
        "f1_micro_scores": [],
        "mcc_scores": []
    },
    "a12": {
        "accuracy_scores": [],
        "f1_macro_scores": [],
        "f1_micro_scores": [],
        "mcc_scores": []
    },
    "a21": {
        "accuracy_scores": [],
        "f1_macro_scores": [],
        "f1_micro_scores": [],
        "mcc_scores": []
    }
    }

    accuracy_scores = []
    f1_macro_scores = []
    f1_micro_scores = []
    mcc_scores = []

    data_spike_exec_1_split_dict = {}
    for dataset in overall_result.keys():
        data_spike_exec_1_split_dict[dataset] = pd.DataFrame()

    #session_id_for_df = session.get_trial_id()
    #print(type(session_id_for_df))
    
    # save individual results for each dataset
    for i in range(5):
        eval_dict = evaluate_model_on_dataset_one_split(i, config)
        # fill all nan values in eval_dict with 0
        for dataset in eval_dict.keys():
            for key in eval_dict[dataset].keys():
                if np.isnan(eval_dict[dataset][key]):
                    eval_dict[dataset][key] = 0
        
        for dataset in overall_result.keys():

            overall_result[dataset]['accuracy_scores'].append(eval_dict[dataset]['accuracy'])
            overall_result[dataset]['f1_macro_scores'].append(eval_dict[dataset]['macro_f1'])
            overall_result[dataset]['f1_micro_scores'].append(eval_dict[dataset]['micro_f1'])
            overall_result[dataset]['mcc_scores'].append(eval_dict[dataset]['mcc'])

            accuracy_scores.append(eval_dict[dataset]['accuracy'])
            f1_macro_scores.append(eval_dict[dataset]['macro_f1'])
            f1_micro_scores.append(eval_dict[dataset]['micro_f1'])
            mcc_scores.append(eval_dict[dataset]['mcc'])
            
            new_entry= pd.DataFrame({
            #data_spike_exec_1_split_dict[dataset][dataset + "_" + str(i+1) + "_" + session_id_for_df] = {
                #"name": [session_id_for_df],
                "accuracy": [eval_dict[dataset]['accuracy']],
                "macro f1": [eval_dict[dataset]['macro_f1']],
                "micro_f1": [eval_dict[dataset]['micro_f1']],
                "mcc": [eval_dict[dataset]['mcc']],
                "config": [str(config)],
                "config_activation": [str(config["activation"])],
                "config_learning_rate": [str(config["learning_rate"])],
                "config_number_shared_hidden_layers": [str(len(config["shared_hidden_layers"]))],
                "config_shared_hidden_layers": [str(config["shared_hidden_layers"])],
                "config_number_a2_hidden_layers": [str(len(config["a2_output_hidden_layers"]))],
                "config_a2_hidden_layers": [str(config["a2_output_hidden_layers"])],
                "config_number_a3_hidden_layers": [str(len(config["a3_output_hidden_layers"]))],
                "config_a3_hidden_layers": [str(config["a3_output_hidden_layers"])],
                "config_number_a4_hidden_layers": [str(len(config["a4_output_hidden_layers"]))],
                "config_a4_hidden_layers": [str(config["a4_output_hidden_layers"])],
                "config_number_a12_hidden_layers": [str(len(config["a12_output_hidden_layers"]))],
                "config_a12_hidden_layers": [str(config["a12_output_hidden_layers"])],
                "config_number_a21_hidden_layers": [str(len(config["a21_output_hidden_layers"]))],
                "config_a21_hidden_layers": [str(config["a21_output_hidden_layers"])],
                "config_epochs": [str(config["epochs"])],
                "config_batch_size": [str(config["batch_size"])],
                "config_dropout_rate": [str(config["dropout_rate"])],
            })

            if i == 0:
                df_data_spike_1_split[dataset] = new_entry
            else:
                df_data_spike_1_split[dataset]= pd.concat([df_data_spike_1_split[dataset], new_entry], ignore_index=True, axis=0)


    
    for dataset in overall_result.keys():

        #data_spike_exec_full_split_dict[dataset][dataset + "_" + session_id_for_df] = {
        new_data_spike_exec_full_split_dict = pd.DataFrame({
                #"name" : [session_id_for_df],

                "config_activation": [str(config["activation"])],
                "config_learning_rate": [str(config["learning_rate"])],
                "config_number_shared_hidden_layers": [str(len(config["shared_hidden_layers"]))],
                "config_shared_hidden_layers": [str(config["shared_hidden_layers"])],
                "config_number_a2_hidden_layers": [str(len(config["a2_output_hidden_layers"]))],
                "config_a2_hidden_layers": [str(config["a2_output_hidden_layers"])],
                "config_number_a3_hidden_layers": [str(len(config["a3_output_hidden_layers"]))],
                "config_a3_hidden_layers": [str(config["a3_output_hidden_layers"])],
                "config_number_a4_hidden_layers": [str(len(config["a4_output_hidden_layers"]))],
                "config_a4_hidden_layers": [str(config["a4_output_hidden_layers"])],
                "config_number_a12_hidden_layers": [str(len(config["a12_output_hidden_layers"]))],
                "config_a12_hidden_layers": [str(config["a12_output_hidden_layers"])],
                "config_number_a21_hidden_layers": [str(len(config["a21_output_hidden_layers"]))],
                "config_a21_hidden_layers": [str(config["a21_output_hidden_layers"])],
                "config_epochs": [str(config["epochs"])],
                "config_batch_size": [str(config["batch_size"])],
                "config_dropout_rate": [str(config["dropout_rate"])],

                "mean_accuracy": [np.mean(overall_result[dataset]['accuracy_scores'])],
                "std_accuracy": [np.std(overall_result[dataset]['accuracy_scores'])],
                "min_accuracy": [min(overall_result[dataset]['accuracy_scores'])],
                "max_accuracy": [max(overall_result[dataset]['accuracy_scores'])],

                "mean_f1_macro": [np.mean(overall_result[dataset]['f1_macro_scores'])],
                "min_f1_macro": [min(overall_result[dataset]['f1_macro_scores'])],
                "max_f1_macro": [max(overall_result[dataset]['f1_macro_scores'])],
                "std_f1_macro": [np.std(overall_result[dataset]['f1_macro_scores'])],

                "mean_f1_micro": [np.mean(overall_result[dataset]['f1_micro_scores'])],
                "min_f1_micro": [min(overall_result[dataset]['f1_micro_scores'])],
                "max_f1_micro": [max(overall_result[dataset]['f1_micro_scores'])],
                "std_f1_micro": [np.std(overall_result[dataset]['f1_micro_scores'])],

                "mean_mcc": [np.mean(overall_result[dataset]['mcc_scores'])],
                "std_mcc": [np.std(overall_result[dataset]['mcc_scores'])],
                "min_mcc": [min(overall_result[dataset]['mcc_scores'])],
                "max_mcc": [max(overall_result[dataset]['mcc_scores'])],

                "config": [str(config)]
                
        })

        full_split_path = os.path.join(helper.results_dir, f'Feature_based/optimize_model_for_a12_tune_for_avg_acc/{dataset}.pkl')
        if os.path.exists(full_split_path):
            df_existing_full = pd.read_pickle(full_split_path)
            df_data_spike_full_split = pd.concat([df_existing_full, new_data_spike_exec_full_split_dict], ignore_index=True, axis=0)
        else:
            print(f"No existing full split data found at {full_split_path}, creating new file.")
            df_data_spike_full_split = new_data_spike_exec_full_split_dict
        
        # Save the updated full split data to file
        df_data_spike_full_split.to_pickle(full_split_path)


        # Save the 1 split data using the full path
        split_path = os.path.join(helper.results_dir, f'Feature_based/optimize_model_for_a12_tune_for_avg_acc/{dataset}_single_split.pkl')
        if os.path.exists(split_path):
            df_existing_1_spike = pd.read_pickle(split_path)
            df_data_spike_1_split[dataset] = pd.concat([df_data_spike_1_split[dataset], df_existing_1_spike], ignore_index=True, axis=0)
        else:
            print(f"No existing 1 split data found at {split_path}, creating new file")

        # Save the updated 1 split data to file
        df_data_spike_1_split[dataset].to_pickle(split_path)

    if np.min(accuracy_scores) == 0:
        print(f"Zero accuracy detected in config: {config}")
        print(f"Accuracy scores: {accuracy_scores}")

    #session.report({
    #    "mean_accuracy_a12": np.mean(overall_result['a12']['accuracy_scores']),
    #    "min_mean_accuracy": np.min(accuracy_scores),
    #    "max_mean_accuracy": np.max(accuracy_scores),
    #    "mean_mean_accuracy": np.mean(accuracy_scores),
    #    "mean_mean_f1_macro": np.mean(f1_macro_scores),
    #    "mean_mean_f1_micro": np.mean(f1_micro_scores),
    #    "mean_mean_mcc": np.mean(mcc_scores)
    #})

In [9]:
def generate_hidden_layers_config(min_layers=1, max_layers=5, min_nodes=10, max_nodes=50, step=10):
    possible_layers = []

    for num_layers in range(min_layers, max_layers + 1):

        shared_layers = list(itertools.product(range(min_nodes, max_nodes + 1, step), repeat=num_layers))

        max_output_layers = max_layers - num_layers
        
        output_layers = []
        for num_output_layers in range(0, max_output_layers + 1):

            output_layers = list(itertools.product(range(min_nodes, max_nodes + 1, step), repeat=num_output_layers))

        possible_layers.append({'shared': shared_layers, 'output': {'a2': output_layers, 'a3': output_layers, 'a4': output_layers, 'a12': output_layers, 'a21': output_layers}})

    return possible_layers

In [12]:
def five_fold_cross_validation(num_layers_shared, num_layers_individual):

    global best_params_list_getting

    #shared_hidden_layers_options = list(itertools.product(range(10, 50 + 1, 20), repeat=num_layers_shared))
    #individual_hidden_layers_options = list(itertools.product(range(10, 50 + 1, 20), repeat=num_layers_individual))

    shared_hidden_layers_options = [30 for i in range(0, num_layers_shared)]
    #individual_hidden_layers_options = [10 for i in range(0, num_layers_individual)]
    individual_hidden_layers_options = [10,5]
    #config = {
    #    "activation": tune.choice(["relu"]),
    #    "learning_rate": tune.loguniform(1e-4, 1e-2),
    #    "batch_size": tune.choice([32, 64, 128]),
    #    "shared_hidden_layers": tune.choice(shared_hidden_layers_options),
    #    "a2_output_hidden_layers": tune.choice(individual_hidden_layers_options),
    #    "a3_output_hidden_layers": tune.choice(individual_hidden_layers_options),
    #    "a4_output_hidden_layers": tune.choice(individual_hidden_layers_options),
    #    "a12_output_hidden_layers": tune.choice(individual_hidden_layers_options),
    #    "a21_output_hidden_layers": tune.choice(individual_hidden_layers_options),
    #    "epochs": tune.choice([40, 50, 60]),
    #    "dropout_rate": tune.uniform(0.2, 0.5)
    #}


# {'activation': 'relu', 'learning_rate': 0.0010403297388100502, 'batch_size': 32, 'shared_hidden_layers': (40,), 'a2_output_hidden_layers': (50, 30, 10, 10), 'a3_output_hidden_layers': (20, 30, 10, 20), 
# #'a4_output_hidden_layers': (40, 30, 40, 20), 'a12_output_hidden_layers': (40, 50, 30, 50), 'a21_output_hidden_layers': (20, 20, 20, 20), 'epochs': 50, 'dropout_rate': 0.3364741105922103} with a mean accuracy of 0.6904200512645706

    config = {
        "activation": "relu",
        "learning_rate": 0.0010403297388100502,
        "batch_size": 32,
        "shared_hidden_layers": [40],
        "a2_output_hidden_layers": [50, 30, 10, 10],
        "a3_output_hidden_layers": [20, 30, 10, 20],
        "a4_output_hidden_layers": [40, 30, 40, 20],
        "a12_output_hidden_layers": [40, 50, 30, 50],
        "a21_output_hidden_layers": [20, 20, 20, 20],
        "epochs": 50,
        "dropout_rate": 0.3364741105922103
    }

    train_and_evaluate(config)
    
    #scheduler = ASHAScheduler(
    #    metric="mean_accuracy_a12",
    #    mode="max",
    #    max_t=10,
    #    grace_period=1,
    #    reduction_factor=2
    #)
    
    #search_alg = OptunaSearch(metric="mean_accuracy_a12", mode="max")
    
    #analysis = tune.run(
    #    tune.with_parameters(train_and_evaluate),
    #    resources_per_trial={"cpu": 12, "gpu": 1, "accelerator_type:RTX": 1},
    #    config=config,
    #    scheduler=scheduler,
    #    search_alg=search_alg,
    #    num_samples=1,
    #    verbose=1,
    #    storage_path=helper.ray_results_dir,
    #    trial_dirname_creator=custom_trial_dirname
    #)

    #best_config_data_ray_tune = analysis.get_best_config(metric="mean_accuracy_a12", mode="max")
    #print("Best hyperparameters found were: ", best_config_data_ray_tune)
    #best_params_list_getting.append(best_config_data_ray_tune)
    
    #return analysis

In [13]:
analysis = five_fold_cross_validation(0, 0)

#for i in range(0,6):
#    print(i)
#    for j in range(0,6):
#        analysis = five_fold_cross_validation(i, j)

In [14]:
df_dict_A2 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A2.pkl")
df_dict_A3 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A3.pkl")
df_dict_A4 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A4.pkl")
df_dict_A12 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A12.pkl")
df_dict_A21 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A21.pkl")

df_dict = pd.concat([df_dict_A2, df_dict_A3, df_dict_A4, df_dict_A12, df_dict_A21])

df_dict = df_dict.loc[:,~df_dict.columns.duplicated()].copy()
df_dict = df_dict.drop_duplicates()

df_dict.to_csv('outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/_one_split_metrics_system.csv')

In [15]:
df_dict_full_A2 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A2.pkl")
df_dict_full_A3 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A3.pkl")
df_dict_full_A4 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A4.pkl")
df_dict_full_A12 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A12.pkl")
df_dict_full_A21 = pd.read_pickle("results/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A21.pkl")

# remove any columns with "config!" in the name
df_dict_A3 = df_dict_full_A3.loc[:,~df_dict_full_A3.columns.str.contains('config')].copy()
df_dict_A4 = df_dict_full_A4.loc[:,~df_dict_full_A4.columns.str.contains('config')].copy()
df_dict_A12 = df_dict_full_A12.loc[:,~df_dict_full_A12.columns.str.contains('config')].copy()
df_dict_A21 = df_dict_full_A21.loc[:,~df_dict_full_A21.columns.str.contains('config')].copy()

# for a2 add the suffix '_a2' to all following columns: mean_accuracy	std_accuracy	min_accuracy	max_accuracy	mean_f1_macro	min_f1_macro	max_f1_macro	std_f1_macro	mean_f1_micro	min_f1_micro	max_f1_micro	std_f1_micro	mean_mcc	std_mcc	min_mcc	max_mcc
#df_dict_A2.columns = [str(col) + '_a2' for col in ['mean_accuracy', 'std_accuracy', 'min_accuracy', 'max_accuracy', 'mean_f1_macro', 'min_f1_macro', 'max_f1_macro', 'std_f1_macro', 'mean_f1_micro', 'min_f1_micro', 'max_f1_micro', 'std_f1_micro', 'mean_mcc', 'std_mcc', 'min_mcc', 'max_mcc']]

#df_full_dict = df_dict_A2.join(df_dict_A3.set_index('name'), on='name', rsuffix='_a3')
#df_full_dict = df_full_dict.join(df_dict_A4.set_index('name'), on='name', rsuffix='_a4')
#df_full_dict = df_full_dict.join(df_dict_A12.set_index('name'), on='name', rsuffix='_a12')
#df_full_dict = df_full_dict.join(df_dict_A21.set_index('name'), on='name', rsuffix='_a21')

df_full_dict = df_dict_A2.join(df_dict_A3, rsuffix='_a3')
df_full_dict = df_full_dict.join(df_dict_A4, rsuffix='_a4')
df_full_dict = df_full_dict.join(df_dict_A12, rsuffix='_a12')
df_full_dict = df_full_dict.join(df_dict_A21, rsuffix='_a21')

# calculate the mean mean accuracy 
df_full_dict['mean_mean_accuracy'] = (df_full_dict['mean_accuracy'] + df_full_dict['mean_accuracy_a3'] + df_full_dict['mean_accuracy_a4'] + df_full_dict['mean_accuracy_a12'] + df_full_dict['mean_accuracy_a21']) / 5


df_full_dict = df_full_dict.loc[:, ~df_full_dict.columns.duplicated()].copy()
df_full_dict = df_full_dict.drop_duplicates()
df_full_dict["loss_criterion"] = "if dataset == 'a12': losses_per_epoch[dataset] += loss * 32 else: losses_per_epoch[dataset] += loss for dataset in helper.dataset_list: losses_per_epoch[dataset] /= len(X_train[dataset]) loss = sum(losses_per_epoch.values())"

df_full_dict.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/full_split.csv")

In [16]:
df_dict_full_A2.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A2.csv")
df_dict_full_A3.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A3.csv")
df_dict_full_A4.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A4.csv")
df_dict_full_A12.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A12.csv")
df_dict_full_A21.to_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/A21.csv")


In [18]:
# print the configs and results for the hyperparemters wiht the highest mean accuracy
# read df_full_dict and print columns with configs in best_params_list_getting
df_full_dict = pd.read_csv("outputs/Feature_based/optimize_model_for_a12_tune_for_avg_acc/full_split.csv")

config_acc_dict = {}
#for good_param in best_params_list_getting:
#    # get the row witht the highest mean mean accuracy
#    mean_accuracy_a12 = df_full_dict.loc[df_full_dict['config'] == str(good_param)]['mean_accuracy_a12']
#    config_acc_dict[str(good_param)] = mean_accuracy_a12
#    print(f"Mean accuracy for a12 config {good_param} is {mean_accuracy_a12}")

# get the best config
# TODO get config where a12 has the highest mean accuracy

for i in range(0, len(df_full_dict)):
    mean_accuracy_a12 = df_full_dict.loc[i]['mean_accuracy_a12']
    config_acc_dict[df_full_dict.loc[i]['config']] = mean_accuracy_a12

best_config = max(config_acc_dict, key=config_acc_dict.get)
print(f"The best config is {best_config} with a mean accuracy for a12 of {config_acc_dict[best_config]}")

#print aggregate results for the best config
print("a2")
metrics = ['mean_accuracy', 'std_accuracy', 'mean_f1_macro', 'std_f1_macro', 'mean_f1_micro', 'std_f1_micro', 'mean_mcc', 'std_mcc']
print("a3")
metrics = ['mean_accuracy_a3', 'std_accuracy_a3', 'mean_f1_macro_a3', 'std_f1_macro_a3', 'mean_f1_micro_a3', 'std_f1_micro_a3', 'mean_mcc_a3', 'std_mcc_a3']
print("a4")
metrics = [ 'mean_accuracy_a4', 'std_accuracy_a4', 'mean_f1_macro_a4', 'std_f1_macro_a4', 'mean_f1_micro_a4', 'std_f1_micro_a4', 'mean_mcc_a4', 'std_mcc_a4']
print("a12")
metrics = ['mean_accuracy_a12', 'std_accuracy_a12', 'mean_f1_macro_a12', 'std_f1_macro_a12', 'mean_f1_micro_a12', 'std_f1_micro_a12', 'mean_mcc_a12', 'std_mcc_a12']
print("a21")
metrics = ['mean_accuracy_a21', 'std_accuracy_a21', 'mean_f1_macro_a21', 'std_f1_macro_a21', 'mean_f1_micro_a21', 'std_f1_micro_a21', 'mean_mcc_a21', 'std_mcc_a21']
print(df_full_dict[metrics].loc[df_full_dict['config'] == best_config])
    




The best config is {'activation': 'relu', 'learning_rate': 0.0005, 'batch_size': 128, 'shared_hidden_layers': [30, 30], 'a2_output_hidden_layers': [], 'a3_output_hidden_layers': [], 'a4_output_hidden_layers': [], 'a12_output_hidden_layers': [], 'a21_output_hidden_layers': [], 'epochs': 60, 'dropout_rate': 0.25} with a mean accuracy for a12 of 0.2427507999625122
a2
a3
a4
a12
a21
    mean_accuracy_a21  std_accuracy_a21  mean_f1_macro_a21  std_f1_macro_a21  \
12           0.574178          0.056345           0.417204          0.040371   
48           0.589416          0.063115           0.468233          0.047618   
84           0.579910          0.089089           0.405468          0.074473   

    mean_f1_micro_a21  std_f1_micro_a21  mean_mcc_a21  std_mcc_a21  
12           0.574178          0.056345      0.026965     0.087084  
48           0.589416          0.063115      0.089450     0.056594  
84           0.579910          0.089089      0.001716     0.127255  
