In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

## DNN

In [2]:
data_dnn = pd.read_json('results_dnn.json').T

In [3]:
metrics_dnn = []

for i, row in data_dnn.iterrows():

    accuracy_fold_train = []
    accuracy_fold_test = []

    recall_fold_train = []
    recall_fold_test = []

    precision_fold_train = []
    precision_fold_test = []

    for iteration in row["results"]:
        
        loss = iteration["loss_values"]
        conf_mat_train = np.array(iteration["confusion_matrix_train"])
        conf_mat_test = np.array(iteration["confusion_matrix_val"])
        
        # Compute accuracy (7 classes)
        total_train = conf_mat_train.sum()
        total_test = conf_mat_test.sum()

        accuracy_train = conf_mat_train.diagonal().sum() / total_train if total_train > 0 else 0
        accuracy_test = conf_mat_test.diagonal().sum() / total_test if total_test > 0 else 0

        # Compute recall and precision (per class)
        recall_train = np.array([
            conf_mat_train[i, i] / conf_mat_train[i, :].sum() if conf_mat_train[i, :].sum() > 0 else 0
            for i in range(7)
        ])
        recall_test = np.array([
            conf_mat_test[i, i] / conf_mat_test[i, :].sum() if conf_mat_test[i, :].sum() > 0 else 0
            for i in range(7)
        ])

        precision_train = np.array([
            conf_mat_train[i, i] / conf_mat_train[:, i].sum() if conf_mat_train[:, i].sum() > 0 else 0
            for i in range(7)
        ])
        precision_test = np.array([
            conf_mat_test[i, i] / conf_mat_test[:, i].sum() if conf_mat_test[:, i].sum() > 0 else 0
            for i in range(7)
        ])

        accuracy_fold_train.append(accuracy_train)
        accuracy_fold_test.append(accuracy_test)

        recall_fold_train.append(recall_train)
        recall_fold_test.append(recall_test)

        precision_fold_train.append(precision_train)
        precision_fold_test.append(precision_test)

    # Compute mean across folds
    accuracy_train = np.mean(accuracy_fold_train)
    accuracy_test = np.mean(accuracy_fold_test)

    recall_train = np.mean(recall_fold_train, axis=0)
    recall_test = np.mean(recall_fold_test, axis=0)

    precision_train = np.mean(precision_fold_train, axis=0)
    precision_test = np.mean(precision_fold_test, axis=0)

    average_recall_train = np.mean(recall_train)
    average_recall_test = np.mean(recall_test)

    average_precision_train = np.mean(precision_train)
    average_precision_test = np.mean(precision_test)

    f1_train_average = 2 * (average_precision_train * average_recall_train) / (average_precision_train + average_recall_train)
    f1_test_average = 2 * (average_precision_test * average_recall_test) / (average_precision_test + average_recall_test) 

    row = row.drop("results")
    row_dict = row.to_dict()

    metrics_dnn.append({
        **row_dict,
        "iteration": i,
        "accuracy_train": accuracy_train,
        "accuracy_test": accuracy_test,
        "recall_train": recall_train.tolist(),  # Convert to list for DataFrame compatibility
        "recall_test": recall_test.tolist(),
        "averaged_recall_train": average_recall_train,
        "averaged_recall_test": average_recall_test,
        "precision_train": precision_train.tolist(),
        "precision_test": precision_test.tolist(),
        "averaged_precision_train": average_precision_train,
        "averaged_precision_test": average_precision_test,
        "f1_train_average": f1_train_average,
        "f1_test_average": f1_test_average
    })

metrics_dnn = pd.DataFrame(metrics_dnn)
#metrics_dnn


## CNN

In [49]:
def get_metrics(data : pd.DataFrame):

    metrics_cnn = []

    for i, row in data.iterrows():

        accuracy_fold_train = []
        accuracy_fold_test = []

        recall_fold_train = []
        recall_fold_test = []

        precision_fold_train = []
        precision_fold_test = []

        confusion_matrix_train = []
        confusion_matrix_test = []

        all_losses = []

        for iteration in row["results"]:
            
            loss = iteration["loss_values"]

            all_losses.append(loss)

            conf_mat_train = np.array(iteration["confusion_matrix_train"])
            conf_mat_test = np.array(iteration["confusion_matrix_val"])
            
            # Compute accuracy (7 classes)
            total_train = conf_mat_train.sum()
            total_test = conf_mat_test.sum()

            accuracy_train = conf_mat_train.diagonal().sum() / total_train if total_train > 0 else 0
            accuracy_test = conf_mat_test.diagonal().sum() / total_test if total_test > 0 else 0

            # Compute recall and precision (per class)
            recall_train = np.array([
                conf_mat_train[i, i] / conf_mat_train[i, :].sum() if conf_mat_train[i, :].sum() > 0 else 0
                for i in range(7)
            ])
            recall_test = np.array([
                conf_mat_test[i, i] / conf_mat_test[i, :].sum() if conf_mat_test[i, :].sum() > 0 else 0
                for i in range(7)
            ])

            precision_train = np.array([
                conf_mat_train[i, i] / conf_mat_train[:, i].sum() if conf_mat_train[:, i].sum() > 0 else 0
                for i in range(7)
            ])
            precision_test = np.array([
                conf_mat_test[i, i] / conf_mat_test[:, i].sum() if conf_mat_test[:, i].sum() > 0 else 0
                for i in range(7)
            ])

            try:
                confusion_matrix_train += conf_mat_train
                confusion_matrix_test += conf_mat_test
            except:
                confusion_matrix_train = conf_mat_train
                confusion_matrix_test = conf_mat_test

            accuracy_fold_train.append(accuracy_train)
            accuracy_fold_test.append(accuracy_test)

            recall_fold_train.append(recall_train)
            recall_fold_test.append(recall_test)

            precision_fold_train.append(precision_train)
            precision_fold_test.append(precision_test)

        # Compute mean across folds
        accuracy_train = np.mean(accuracy_fold_train)
        accuracy_test = np.mean(accuracy_fold_test)

        recall_train = np.mean(recall_fold_train, axis=0)
        recall_test = np.mean(recall_fold_test, axis=0)

        precision_train = np.mean(precision_fold_train, axis=0)
        precision_test = np.mean(precision_fold_test, axis=0)

        average_recall_train = np.mean(recall_train)
        average_recall_test = np.mean(recall_test)

        average_precision_train = np.mean(precision_train)
        average_precision_test = np.mean(precision_test)

        f1_train_average = 2 * (average_precision_train * average_recall_train) / (average_precision_train + average_recall_train)
        f1_test_average = 2 * (average_precision_test * average_recall_test) / (average_precision_test + average_recall_test) 

        row = row.drop("results")
        row_dict = row.to_dict()

        metrics_cnn.append({
            **row_dict,
            "iteration": i,
            "accuracy_train": accuracy_train,
            "accuracy_test": accuracy_test,
            "recall_train": recall_train.tolist(),  # Convert to list for DataFrame compatibility
            "recall_test": recall_test.tolist(),
            "averaged_recall_train": average_recall_train,
            "averaged_recall_test": average_recall_test,
            "precision_train": precision_train.tolist(),
            "precision_test": precision_test.tolist(),
            "averaged_precision_train": average_precision_train,
            "averaged_precision_test": average_precision_test,
            "f1_train_average": f1_train_average,
            "f1_test_average": f1_test_average,
            "confusion_matrix_train": confusion_matrix_train,
            "confusion_matrix_test": confusion_matrix_test,
            "loss": all_losses
        })

    metrics_cnn = pd.DataFrame(metrics_cnn)
    return metrics_cnn


In [5]:
def plot_confusion_matrix(df):
    confusion_matrix_train = np.array(df["confusion_matrix_train"].tolist())
    confusion_matrix_test = np.array(df["confusion_matrix_test"].tolist())
    
    labels = [[str(val) for val in row] for row in confusion_matrix_train]
    
    fig_train = go.Figure(data=go.Heatmap(
        z=confusion_matrix_train,
        x=["Class {}".format(i) for i in range(7)],
        y=["Class {}".format(i) for i in range(7)],
        colorscale='Viridis',
        text=labels,
        texttemplate="%{text}",
        showscale=True
    ))

    fig_train.update_layout(
        title='Confusion Matrix (Train)',
        xaxis_title='Predicted Class',
        yaxis_title='True Class',
        width=800,
        height=600
    )
    
    labels = [[str(val) for val in row] for row in confusion_matrix_test]
    
    fig_test = go.Figure(data=go.Heatmap(
        z=confusion_matrix_test,
        x=["Class {}".format(i) for i in range(7)],
        y=["Class {}".format(i) for i in range(7)],
        colorscale='Viridis',
        text=labels,
        texttemplate="%{text}",
        showscale=True
    ))

    fig_test.update_layout(
        title='Confusion Matrix (Test)',
        xaxis_title='Predicted Class',
        yaxis_title='True Class',
        width=800,
        height=600
    )

    fig_train.show()
    fig_test.show()

In [6]:
data_cnn = pd.read_json('results_cnn_1.json').T

In [7]:
metrics_cnn = get_metrics(data_cnn)

In [8]:
# get 50 best performances in terms of test accuracy
best_cnn = metrics_cnn.sort_values(by="accuracy_test", ascending=False).head(50)
data_cnn_50_best = data_cnn.loc[best_cnn.index]
data_cnn_50_best.to_csv("data_cnn_50_best.csv")

## Impact of the different Hyper-parameters

In [9]:
metrics_dnn["model"] = "DNN"
metrics_cnn["model"] = "CNN"

metrics_all = pd.concat([metrics_cnn, metrics_dnn], ignore_index=True)
metrics_all["learning_rate"] = metrics_all["learning_rate"].astype(str)
metrics_all["batch_size"] = metrics_all["batch_size"].astype(str)

In [10]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Define hyperparameters and metrics
hyperparameters = ['activation_function', 'n_layers', 'batch_size', 
                'learning_rate', 'loss_function', 'optimizer',
                'pooling', 'n_conv_layers']

metrics = ['accuracy_train', 'accuracy_test']

metrics_to_str = {
    'accuracy_train': 'Accuracy (Train)',
    'accuracy_test': 'Accuracy (Validation)'
}

# Grid layout settings
params_per_row = 2  # Two parameters per row
n_rows = -(-len(hyperparameters) // params_per_row)  # Ceiling division for rows

# Create subplots
fig = make_subplots(
    rows=n_rows, cols=params_per_row * 2,  # Each parameter has 2 charts (train & test)
    subplot_titles=[f"{param} - {metrics_to_str[metric]}" for param in hyperparameters for metric in metrics],
    shared_yaxes=False,
    horizontal_spacing=0.05, vertical_spacing=0.05
)

# Loop through each hyperparameter
for i, param in enumerate(hyperparameters):
    row = i // params_per_row + 1  # Compute row index

    df_grouped = metrics_all.groupby([param, "model"])[metrics].mean().reset_index()

    # Add plots for accuracy_train and accuracy_test
    for j, metric in enumerate(metrics):

        col = i % params_per_row * 2 + j + 1  # Compute column index

        for model, color in zip(["DNN", "CNN"], ["#636EFA", "#EF553B"]):

            df_subset = df_grouped[df_grouped["model"] == model]
            
            fig.add_trace(
                go.Bar(
                    x=df_subset[param], 
                    y=df_subset[metric], 
                    name=f"{model}",
                    marker_color=color,
                    legendgroup=model,
                    showlegend=(i == 0 and j == 0)  # Show legend only once
                ),
                row=row, col=col
            )

# Update layout
fig.update_layout(
    title_text="Impact of Hyperparameters on Accuracy (CNN vs. DNN)",
    height=300 * n_rows, width=1400,
    showlegend=True
)

# Show figure
#fig.show()

a = 1


## Overall Performance

In [11]:
# get the configuration with the best test accuracy

best_cnn = metrics_cnn.sort_values(by="accuracy_test", ascending=False).head(1)
best_dnn = metrics_dnn.sort_values(by="accuracy_test", ascending=False).head(1)

In [12]:
best_cnn_params = best_cnn.drop(columns=["iteration", 'accuracy_train', 'accuracy_test', 'recall_train',
       'recall_test', 'averaged_recall_train', 'averaged_recall_test',
       'precision_train', 'precision_test', 'averaged_precision_train',
       'averaged_precision_test', 'f1_train_average', 'f1_test_average',
       "model"])

print(best_cnn.columns)
best_cnn_params

Index(['n_epochs', 'activation_function', 'pooling', 'n_conv_layers',
       'conv_out_channels', 'conv_kernel_size', 'conv_padding', 'n_layers',
       'batch_size', 'learning_rate', 'loss_function', 'optimizer',
       'iteration', 'accuracy_train', 'accuracy_test', 'recall_train',
       'recall_test', 'averaged_recall_train', 'averaged_recall_test',
       'precision_train', 'precision_test', 'averaged_precision_train',
       'averaged_precision_test', 'f1_train_average', 'f1_test_average',
       'confusion_matrix_train', 'confusion_matrix_test', 'model'],
      dtype='object')


Unnamed: 0,n_epochs,activation_function,pooling,n_conv_layers,conv_out_channels,conv_kernel_size,conv_padding,n_layers,batch_size,learning_rate,loss_function,optimizer,confusion_matrix_train,confusion_matrix_test
103,50,relu,MaxPool,2,16,3,1,3,64,0.1,Cross Entropy,SGD,"[[2753, 12, 13, 16, 5, 1, 0], [43, 2685, 20, 2...","[[438, 86, 59, 59, 40, 15, 3], [133, 339, 59, ..."


In [13]:
# bar plot with train and test accuracy

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["CNN", "DNN"],
    y=[best_cnn["accuracy_train"].values[0], best_dnn["accuracy_train"].values[0]],
    name='Train',
    marker_color='#636EFA'
))

fig.add_trace(go.Bar(
    x=["CNN", "DNN"],
    y=[best_cnn["accuracy_test"].values[0], best_dnn["accuracy_test"].values[0]],
    name='Validation',
    marker_color='#EF553B'
))

fig.update_layout(
    title='Train and Test Accuracy for the Best CNN and DNN Models',
    barmode='group',
    xaxis_title='Model',
    yaxis_title='Accuracy',
    showlegend=True
)

fig.show()

In [14]:
# bar plot with train and test accuracy

fig = go.Figure()

fig.add_trace(go.Bar(
    x=["CNN", "DNN"],
    y=[best_cnn["f1_train_average"].values[0], best_dnn["f1_train_average"].values[0]],
    name='Train',
    marker_color='#636EFA'
))

fig.add_trace(go.Bar(
    x=["CNN", "DNN"],
    y=[best_cnn["f1_test_average"].values[0], best_dnn["f1_test_average"].values[0]],
    name='Validation',
    marker_color='#EF553B'
))

fig.update_layout(
    title='Train and Test Accuracy for the Best CNN and DNN Models',
    barmode='group',
    xaxis_title='Model',
    yaxis_title='Accuracy',
    showlegend=True
)

fig.show()

# FIXING ATTEMPTS

## Early Stopping

In [15]:
data_cnn_early_stopping = pd.read_json('results_cnn_early.json').T
#data_cnn_early_stopping

In [16]:
metrics_early = get_metrics(data_cnn_early_stopping)

In [17]:
max_f1_train = metrics_early["f1_train_average"].max()
max_f1_test = metrics_early["f1_test_average"].max()

max_f1_test, max_f1_train

(np.float64(0.6309427591407378), np.float64(0.8425808460199662))

## No Data Augmentation

In [18]:
data_no_aug_cnn = pd.read_json('../results_original/results_cnn.json').T

#data_no_aug_cnn

In [19]:
import ast

results_all = []
j = 0

for i, row in data_no_aug_cnn.iterrows():

    conf_mat_train = np.array(row["confusion_matrix_train"])
    conf_mat_test = np.array(row["confusion_matrix_val"])

    total_train = conf_mat_train.sum()
    total_test = conf_mat_test.sum()

    accuracy_train = conf_mat_train.diagonal().sum() / total_train if total_train > 0 else 0
    accuracy_test = conf_mat_test.diagonal().sum() / total_test if total_test > 0 else 0

    # Compute recall and precision (per class)
    recall_train = np.array([
        conf_mat_train[i, i] / conf_mat_train[i, :].sum() if conf_mat_train[i, :].sum() > 0 else 0
        for i in range(7)
    ])
    recall_test = np.array([
        conf_mat_test[i, i] / conf_mat_test[i, :].sum() if conf_mat_test[i, :].sum() > 0 else 0
        for i in range(7)
    ])

    precision_train = np.array([
        conf_mat_train[i, i] / conf_mat_train[:, i].sum() if conf_mat_train[:, i].sum() > 0 else 0
        for i in range(7)
    ])
    precision_test = np.array([
        conf_mat_test[i, i] / conf_mat_test[:, i].sum() if conf_mat_test[:, i].sum() > 0 else 0
        for i in range(7)
    ])

    average_recall_train = np.mean(recall_train)
    average_precision_train = np.mean(precision_train)

    average_recall_test = np.mean(recall_test)
    average_precision_test = np.mean(precision_test)

    f1_score_train = 2 * (average_precision_train * average_recall_train) / (average_precision_train + average_recall_train)
    f1_score_test  = 2 * (average_precision_test  * average_recall_test)  / (average_precision_test  + average_recall_test)

    results_all.append({
        "iteration": j,
        "accuracy_train": accuracy_train,
        "accuracy_test": accuracy_test,
        "recall_train": recall_train.tolist(),
        "recall_test": recall_test.tolist(),
        "averaged_recall_train": average_precision_train,
        "averaged_recall_test": average_recall_test,
        "precision_train": precision_train,
        "precision_test": precision_test,
        "averaged_precision_train": average_precision_train,
        "averaged_precision_test": average_precision_test,
        "f1_score_train": f1_score_train,
        "f1_score_test": f1_score_test,
        "confusion_matrix_train": conf_mat_train,
        "confusion_matrix_test": conf_mat_test,
    })
    
    j += 1

metrics_cnn_no_aug = pd.DataFrame(results_all)
#metrics_cnn_no_aug

In [20]:
max_f1_train = metrics_cnn_no_aug["f1_score_train"].max()
max_f1_test = metrics_cnn_no_aug["f1_score_test"].max()

max_f1_test, max_f1_train

(np.float64(0.5139272775997877), np.float64(0.9076493479959407))

In [21]:
params_max_f1 = metrics_cnn_no_aug.loc[metrics_cnn_no_aug["f1_score_test"].idxmax()]

plot_confusion_matrix(params_max_f1)

## Only undersampling

In [22]:
data_under_cnn = pd.read_json('./results_cnn_under.json').T

In [23]:
metrics_under_cnn = get_metrics(data_under_cnn)

In [24]:
max_f1_train = metrics_under_cnn["f1_train_average"].max()
max_f1_test = metrics_under_cnn["f1_test_average"].max()

max_f1_test, max_f1_train

(np.float64(0.4991124052880161), np.float64(0.8951487107489473))

In [25]:
params_max_f1 = metrics_under_cnn.loc[metrics_under_cnn["f1_test_average"].idxmax()]

plot_confusion_matrix(params_max_f1)

## Early Stopping

In [26]:
data_early_cnn = pd.read_json('./results_cnn_early.json').T

In [27]:
metrics_early_cnn = get_metrics(data_early_cnn)

In [28]:
max_f1_train = metrics_early_cnn["f1_train_average"].max()
max_f1_test = metrics_early_cnn["f1_test_average"].max()

max_f1_test, max_f1_train

(np.float64(0.6309427591407378), np.float64(0.8425808460199662))

In [29]:
params_max_f1 = metrics_early_cnn.loc[metrics_early_cnn["f1_test_average"].idxmax()]

plot_confusion_matrix(params_max_f1)

## Weight Decay

In [30]:
data_decay_cnn = pd.read_json('./results_cnn_decay.json').T

In [31]:
metrics_decay_cnn = get_metrics(data_decay_cnn)

In [32]:
max_f1_test = metrics_decay_cnn["f1_test_average"].max()
max_f1_train = metrics_decay_cnn["f1_train_average"].max()

max_f1_test, max_f1_train

(np.float64(0.6473054780411176), np.float64(0.9513362135318266))

In [33]:
params_max_f1 = metrics_decay_cnn.loc[metrics_decay_cnn["f1_test_average"].idxmax()]

plot_confusion_matrix(params_max_f1)

## Additional tests

None of the previous solutions worked

In [62]:
data_add_cnn = pd.read_json('./results_cnn_additional.json').T

In [63]:
metrics_add_cnn = get_metrics(data_add_cnn)

In [64]:
max_f1_test = metrics_add_cnn["f1_test_average"].max()
max_f1_train = metrics_add_cnn["f1_train_average"].max()

max_f1_test, max_f1_train

(np.float64(0.6590269167182123), np.float64(0.94321155589057))

In [65]:
params_max_f1 = metrics_add_cnn.loc[metrics_add_cnn["f1_test_average"].idxmax()]

In [66]:
plot_confusion_matrix(params_max_f1)

In [68]:
# params for best f1 score
best_f1_test = metrics_add_cnn["f1_test_average"].max()
best_f1_test_row = metrics_add_cnn.loc[metrics_add_cnn["f1_test_average"].idxmax()]
best_f1_test_row = metrics_add_cnn[metrics_add_cnn["iteration"] == 74]
best_f1_test_row

Unnamed: 0,n_epochs,activation_function,pooling,n_conv_layers,conv_out_channels,conv_kernel_size,conv_padding,n_layers,batch_size,learning_rate,...,averaged_recall_test,precision_train,precision_test,averaged_precision_train,averaged_precision_test,f1_train_average,f1_test_average,confusion_matrix_train,confusion_matrix_test,loss
24,50,relu,MaxPool,3,64,3,1,1,64,0.1,...,0.462296,"[0.6108312894422758, 0.545198596514317, 0.5536...","[0.43093616291686515, 0.3961221681655118, 0.35...",0.564877,0.414621,0.588188,0.437163,"[[2669, 57, 19, 34, 16, 5, 0], [1163, 1521, 15...","[[540, 47, 28, 44, 34, 6, 1], [315, 267, 26, 3...","[[239.17402505874634, 236.48553848266602, 221...."


In [69]:
# do a line plot with all the losses - different color for each fold

all_losses = best_f1_test_row["loss"]

fig = go.Figure()

for i, loss in enumerate(all_losses):
    fig.add_trace(go.Scatter(
        x=list(range(len(loss))),
        y=loss,
        mode='lines+markers',
        name=f'Fold {i+1}'
    ))

fig.update_layout(
    title='Losses for Each Fold',
    xaxis_title='Epochs',
    yaxis_title='Loss',
    legend_title='Folds'
)

fig.show()