Step 1: Import Necessary Libraries

In [13]:
import os
import json
import pandas as pd
import numpy as np
from glob import glob
import re

Step 2: Define a Function to Extract Information from File Names

In [14]:
def extract_info_from_filename(filename):
    pattern = r'(?P<data>CIFAR10|CIFAR100|MNIST|Texture|Defect|Concrete)_(?P<model>ResNet18|ResNet50|VGG16)_(?P<type>MP|RBF|Sausage|ENN|ENN_no_initialization)_maxepochs.*\.json'
    match = re.match(pattern, filename)
    if match:
        return match.group('data'), match.group('model'), match.group('type')
    else:
        return None, None, None

Step 3: Read JSON Files and Extract Data

In [15]:
def read_json_files(file_path_pattern, key = 'test_accuracy'):
    files = glob(file_path_pattern)
    data_list = []

    for file in files:
        with open(file, 'r') as f:
            content = json.load(f)
            data, model, type_ = extract_info_from_filename(os.path.basename(file))
            if data and model and type_:
                newlist = [x*100 for x in content[key] if x != np.nan] # clean list from NaN
                data_list.append({
                    'data': data,
                    'model': model,
                    'type': type_,
                    'values': newlist  # Assuming 'values' is a key in your JSON
                })

    return data_list

Step 4: Compute Average and Standard Error

In [16]:
def compute_statistics(data_list):
    df = pd.DataFrame(data_list)
    df['mean'] = df['values'].apply(np.mean)
    df['stderr'] = df['values'].apply(lambda x: np.std(x, ddof=1) / np.sqrt(len(x)))
    df.drop(columns = 'values', inplace = True)
    return df

Step 5: Organize Data into a Pandas DataFrame

In [17]:
def format_values(df):
    # create mean +- stderr format column
    df['formatted'] = df.apply(lambda row: f"{row['mean']:.5f} ± {row['stderr']:.8f}", axis=1)
    df.drop(columns = ['mean', 'stderr'], inplace = True)
    return df

def create_final_dataframe(df):
    formatted_df = format_values(df)
    formatted_df.set_index(['type','model', 'data'], inplace=True)
    return formatted_df

Putting It All Together, first for 20 epochs results

In [18]:
# Define the pattern for your JSON files
file_path_pattern = 'E:/Work/DS/Project/CNN Experiment/20_epochs_results/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern)
df = compute_statistics(data_list)
final_df = create_final_dataframe(df)

In [26]:
# Display the final DataFrame
print(final_df.iloc[65:])

                                                    formatted
type                  model    data                          
ENN                   ResNet18 Texture  90.47408 ± 0.12478700
ENN_no_initialization ResNet18 Texture  86.75426 ± 0.09660118
MP                    ResNet18 Texture  96.64062 ± 0.07184673
RBF                   ResNet18 Texture  34.21165 ± 1.08173665
                               Texture  46.87500 ± 0.38582471
ENN                   ResNet50 Texture  95.51373 ± 0.10600686
ENN_no_initialization ResNet50 Texture  94.68513 ± 0.08080532
MP                    ResNet50 Texture  98.74527 ± 0.04365319
RBF                   ResNet50 Texture  18.46591 ± 0.56670020
ENN                   VGG16    Texture  96.11150 ± 0.12086039
ENN_no_initialization VGG16    Texture  92.77936 ± 0.09290469
MP                    VGG16    Texture  98.59138 ± 0.12891063
RBF                   VGG16    Texture   5.98958 ± 0.16673124


Then, for 200 epochs results

In [20]:
# Define the pattern for your JSON files
file_path_pattern = 'E:/Work/DS/Project/CNN Experiment/200_epochs_results/*.json'

# Create the DataFrame
data_list = read_json_files(file_path_pattern)
df_200 = compute_statistics(data_list)
final_df_200 = create_final_dataframe(df_200)

In [21]:
# Display the final DataFrame
print(final_df_200)

                                                    formatted
type                  model    data                          
ENN                   ResNet18 CIFAR10  87.90899 ± 0.06570042
ENN_no_initialization ResNet18 CIFAR10  87.81801 ± 0.04315299
MP                    ResNet18 CIFAR10  88.58173 ± 0.09962500
RBF                   ResNet18 CIFAR10  88.01082 ± 0.03711763
Sausage               ResNet18 CIFAR10  87.51002 ± 0.03517446
