In [40]:
import csv
import pandas
import numpy as np
import matplotlib.pyplot as plt
import json

In [41]:
# Read the data from the CSV file
filename = 'best_model_2_fold_details.csv'
data = pandas.read_csv(filename)


# Split hyperparameters into separate columns from dict, with keys as column names
data = data.join(data['Hyperparameters'].apply(eval).apply(pandas.Series))

# Convert to dataframe
data = pandas.DataFrame(data)

# Convert all column names to lower case and remove spaces with _
data.columns = data.columns.str.lower().str.replace(' ', '_')
print(data.columns)

# Apply eval to the train and val losses columns
data['train_losses'] = data['train_losses'].apply(eval)
data['val_losses'] = data['val_losses'].apply(eval)

data_backup = data.copy()

print(data.head(1))

Index(['fold', 'hyperparameters', 'train_losses', 'val_losses', 'val_accuracy',
       'val_precision', 'val_recall', 'val_f1', 'time_taken',
       'early_stop_epoch', 'epochs', 'lr', 'bn', 'batch_size', 'dropout_rate',
       'hidden_layers', 'activations', 'input_size', 'weight_decay',
       'optimiser', 'early_stopping'],
      dtype='object')
   fold                                    hyperparameters  \
0     1  {"epochs": 50, "lr": 0.0001, "bn": 0, "batch_s...   

                                        train_losses  \
0  [2.034430485841546, 1.8147283972984134, 1.7246...   

                                          val_losses  val_accuracy  \
0  [1.8129982696405291, 1.6970808465567286, 1.639...        0.4986   

   val_precision  val_recall    val_f1  time_taken  early_stop_epoch  ...  \
0       0.499073    0.500294  0.494809  158.646537               NaN  ...   

       lr  bn  batch_size      dropout_rate      hidden_layers  \
0  0.0001   0           4  [0, 0.1, 0.1, 0]  [128

In [42]:
# Add new column for the final validation loss
data['final_val_loss'] = data['val_losses'].apply(lambda x: x[-1])

In [45]:

# For each 10 rows/folds representing one model, calulate and store the mean of the val_accuracy, val_precision, val_recall, val_f1, val_loss, time_taken of the folds
metrics_dict = {}
for i in range(0, len(data), 10):
    model_hyperparameters = data.iloc[i]['hyperparameters']
    val_accuracy = np.mean(data.iloc[i:i+10]['val_accuracy'])
    val_precision = np.mean(data.iloc[i:i+10]['val_precision'])
    val_recall = np.mean(data.iloc[i:i+10]['val_recall'])
    val_f1 = np.mean(data.iloc[i:i+10]['val_f1'])
    val_loss = np.mean(data.iloc[i:i+10]['final_val_loss'])
    time_taken = np.mean(data.iloc[i:i+10]['time_taken'])
    print(f'{val_accuracy:.3f} & {val_precision:.3f} & {val_recall:.3f} & {val_f1:.3f} & {val_loss:.3f} & {time_taken:.3f} & {model_hyperparameters}')
    


0.503 & 0.500 & 0.503 & 0.496 & 1.418 & 162.518 & {"epochs": 50, "lr": 0.0001, "bn": 0, "batch_size": 4, "dropout_rate": [0, 0.1, 0.1, 0], "hidden_layers": [128, 64, 32, 10], "activations": ["None", "ReLU", "ReLU", "softmax"], "input_size": 128, "weight_decay": 0, "optimiser": "Adam", "early_stopping": [100, 0.001]}
0.485 & 0.491 & 0.485 & 0.480 & 1.449 & 157.018 & {"epochs": 50, "lr": 0.0005, "bn": 0, "batch_size": 4, "dropout_rate": [0, 0.1, 0.1, 0], "hidden_layers": [128, 64, 32, 10], "activations": ["None", "ReLU", "ReLU", "softmax"], "input_size": 128, "weight_decay": 0, "optimiser": "Adam", "early_stopping": [100, 0.001]}
0.443 & 0.451 & 0.443 & 0.433 & 1.583 & 158.382 & {"epochs": 50, "lr": 0.001, "bn": 0, "batch_size": 4, "dropout_rate": [0, 0.1, 0.1, 0], "hidden_layers": [128, 64, 32, 10], "activations": ["None", "ReLU", "ReLU", "softmax"], "input_size": 128, "weight_decay": 0, "optimiser": "Adam", "early_stopping": [100, 0.001]}
0.098 & 0.010 & 0.100 & 0.018 & 2.309 & 161.518