Step 1: Import Necessary Libraries

In [88]:
import os
import json
import pandas as pd
import numpy as np
from glob import glob
import re

Step 2: Define a Function to Extract Information from File Names

In [89]:
def extract_info_from_filename(filename):
    pattern = r'(?P<data>CIFAR10|CIFAR100|MNIST|Texture|Defect|Concrete)_(?P<model>ResNet18|ResNet50|VGG16)_(?P<type>MP|RBF|Sausage|ENN|ENN_no_initialization)_maxepochs.*\.json'
    match = re.match(pattern, filename)
    if match:
        return match.group('data'), match.group('model'), match.group('type')
    else:
        return None, None, None

Step 3: Read JSON Files and Extract Data

In [90]:
def read_json_files(file_path_pattern, key = 'test_accuracy'):
    files = glob(file_path_pattern)
    data_list = []

    for file in files:
        with open(file, 'r') as f:
            content = json.load(f)
            data, model, type_ = extract_info_from_filename(os.path.basename(file))
            if data and model and type_:
                newlist = [x for x in content[key] if x != np.nan] # clean list from NaN
                data_list.append({
                    'data': data,
                    'model': model,
                    'type': type_,
                    'values': newlist  # Assuming 'values' is a key in your JSON
                })

    return data_list

Step 4: Compute Average and Standard Error

In [91]:
def compute_statistics(data_list):
    df = pd.DataFrame(data_list)
    df['mean'] = df['values'].apply(np.mean)
    df['stderr'] = df['values'].apply(lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
    df.drop(columns = 'values', inplace = True)
    return df

Step 5: Organize Data into a Pandas DataFrame

In [92]:
def format_values(df):
    # create mean +- stderr format column
    df['formatted'] = df.apply(lambda row: f"{row['mean']:.5f} ± {row['stderr']:.8f}", axis=1)
    df.drop(columns = ['mean', 'stderr'], inplace = True)
    return df

def create_final_dataframe(df):
    formatted_df = format_values(df)
    formatted_df.set_index(['type','model', 'data'], inplace=True)
    return formatted_df

Putting It All Together, first for 20 epochs results

In [93]:
# Define the pattern for your JSON files
file_path_pattern = 'E:/Work/DS/Project/CNN Experiment/20_epochs_results/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern)
df = compute_statistics(data_list)
final_df = create_final_dataframe(df)

In [94]:
# Display the final DataFrame
print(final_df)

                                                    formatted
type                  model    data                          
ENN                   ResNet18 CIFAR100  0.64179 ± 0.00097624
ENN_no_initialization ResNet18 CIFAR100  0.61671 ± 0.00047845
MP                    ResNet18 CIFAR100  0.68459 ± 0.00040243
RBF                   ResNet18 CIFAR100  0.25989 ± 0.00238836
Sausage               ResNet18 CIFAR100  0.44679 ± 0.00526269
...                                                       ...
RBF                   ResNet50 Texture   0.18466 ± 0.00566700
ENN                   VGG16    Texture   0.96112 ± 0.00120860
ENN_no_initialization VGG16    Texture   0.92779 ± 0.00092905
MP                    VGG16    Texture   0.98591 ± 0.00128911
RBF                   VGG16    Texture   0.05990 ± 0.00166731

[78 rows x 1 columns]


Then, for 200 epochs results

In [95]:
# Define the pattern for your JSON files
file_path_pattern = 'E:/Work/DS/Project/CNN Experiment/200_epochs_results/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern)
df_200 = compute_statistics(data_list)
final_df_200 = create_final_dataframe(df_200)

In [96]:
# Display the final DataFrame
print(final_df_200)

                                                   formatted
type                  model    data                         
ENN                   ResNet18 CIFAR10  0.87909 ± 0.00065700
ENN_no_initialization ResNet18 CIFAR10  0.87818 ± 0.00043153
MP                    ResNet18 CIFAR10  0.88582 ± 0.00099625
RBF                   ResNet18 CIFAR10  0.88011 ± 0.00037118
Sausage               ResNet18 CIFAR10  0.87510 ± 0.00035174
