Step 1: Import Necessary Libraries

In [19]:
import os
import json
import pandas as pd
import numpy as np
from glob import glob
import re

Step 2: Define a Function to Extract Information from File Names

In [20]:
def extract_info_from_filename(filename):
    pattern = r'(?P<data>CIFAR10|CIFAR100|MNIST|Texture|Defect|Concrete)_(?P<model>ResNet18|ResNet50|VGG16)_(?P<type>MP|RBF|Sausage|ENN|ENN_no_initialization)_maxepochs.*\.json'
    match = re.match(pattern, filename)
    if match:
        return match.group('data'), match.group('model'), match.group('type')
    else:
        return None, None, None

Step 3: Read JSON Files and Extract Data

In [21]:
def read_json_files(file_path_pattern, key = 'test_accuracy'):
    files = glob(file_path_pattern)
    data_list = []

    for file in files:
        with open(file, 'r') as f:
            content = json.load(f)
            data, model, type_ = extract_info_from_filename(os.path.basename(file))
            if data and model and type_:
                newlist = [x*100 for x in content[key] if x != np.nan] # clean list from NaN
                data_list.append({
                    'data': data,
                    'model': model,
                    'type': type_,
                    'values': newlist  # Assuming 'values' is a key in your JSON
                })

    return data_list

Step 4: Compute Average and Standard Error

In [22]:
def compute_statistics(data_list):
    df = pd.DataFrame(data_list)
    df['mean'] = df['values'].apply(np.mean)
    df['stderr'] = df['values'].apply(lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
    df.drop(columns = 'values', inplace = True)
    return df

Step 5: Organize Data into a Pandas DataFrame

In [23]:
def format_values(df):
    # create mean +- stderr format column
    df['formatted'] = df.apply(lambda row: f"{row['mean']:.5f} ± {row['stderr']:.8f}", axis=1)
    df.drop(columns = ['mean', 'stderr'], inplace = True)
    return df

def create_final_dataframe(df):
    formatted_df = format_values(df)
    formatted_df.set_index(['type','model', 'data'], inplace=True)
    return formatted_df.unstack(-1)

Putting It All Together, first for 20 epochs results

In [24]:
# Define the pattern for your JSON files
file_path_pattern = 'W:/DS/Project/CNN Experiment/20_epochs_results/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern)
df = compute_statistics(data_list)
final_df = create_final_dataframe(df)

In [25]:
# Display the final DataFrame
print(final_df['formatted']['Concrete'])

type                   model   
ENN                    ResNet18    87.60595 ± 0.05141063
                       ResNet50    89.96682 ± 0.03159691
                       VGG16       86.34596 ± 0.07773647
ENN_no_initialization  ResNet18    86.40197 ± 0.03966666
                       ResNet50    89.30687 ± 0.05529375
                       VGG16       85.78410 ± 0.06811712
MP                     ResNet18    88.00942 ± 0.04789291
                       ResNet50    90.91574 ± 0.03215573
                       VGG16       87.09511 ± 0.08323884
RBF                    ResNet18    83.60659 ± 0.04229395
                       ResNet50    85.46304 ± 0.13302931
                       VGG16       51.97988 ± 2.76441997
Sausage                ResNet18    87.28061 ± 0.15049557
Name: Concrete, dtype: object


Then, for 200 epochs results

In [28]:
# Define the pattern for your JSON files
file_path_pattern = 'W:/DS/Project/CNN Experiment/200_epochs_results/CIFAR100/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern,key = '')
df_200 = compute_statistics(data_list)
final_df_200 = create_final_dataframe(df_200)

In [29]:
# Display the final DataFrame
print(final_df_200)

                                            formatted
data                                         CIFAR100
type                  model                          
ENN                   ResNet18  66.36618 ± 0.07328341
ENN_no_initialization ResNet18  65.91547 ± 0.09277658
MP                    ResNet18  68.43867 ± 0.06562700
RBF                   ResNet18  57.90431 ± 0.16477138
Sausage               ResNet18  60.96755 ± 0.20065431
