In [8]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()


Unnamed: 0,Model Type,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,deepset,45.3132,0.499891,82.4844,0.537254,43.224,0.491957,80.1616,0.570644
1,deepset_xy,45.2,0.566716,82.6224,0.667899,43.1248,0.389139,80.4776,0.742478
2,deepset_xy_additive,44.98,0.569359,82.1016,0.703006,42.894,0.557656,80.2792,0.659472
3,ensemble,62.2356,0.38548,88.7416,0.310326,46.658,0.393341,82.7796,0.317394
4,set_transformer,61.5636,0.563835,88.5128,0.531193,44.0144,0.644098,79.6228,0.502016
5,set_transformer_additive,55.314,1.071493,85.7508,0.998768,43.938,0.784336,79.9212,0.879119
6,set_transformer_xy,61.4564,0.718747,88.6068,0.556844,44.5292,0.565317,79.9184,0.493622


In [9]:
# Separate mean and margin of error tables
mean_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                 Model Type  Train Strict Accuracy (%)_mean  \
0                   deepset                         45.3132   
1                deepset_xy                         45.2000   
2       deepset_xy_additive                         44.9800   
3                  ensemble                         62.2356   
4           set_transformer                         61.5636   
5  set_transformer_additive                         55.3140   
6        set_transformer_xy                         61.4564   

   Train ±1 Grade Accuracy (%)_mean  Val Strict Accuracy (%)_mean  \
0                           82.4844                       43.2240   
1                           82.6224                       43.1248   
2                           82.1016                       42.8940   
3                           88.7416                       46.6580   
4                           88.5128                       44.0144   
5                           85.7508                       43.9380   
6           