In [3]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()


Unnamed: 0,Model Type,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,deepset,44.7572,0.745096,82.364,0.713165,43.434,0.597653,81.1976,0.574212
1,deepset_xy,45.2524,0.316722,82.5496,0.481535,44.064,0.347306,81.488,0.38684
2,deepset_xy_additive,45.3484,0.487568,82.7312,0.639078,43.6208,0.396184,81.7916,0.481967
3,set_transformer,62.2508,0.583968,88.786,0.484073,44.712,0.53447,80.0632,0.426763
4,set_transformer_additive,55.3992,0.667866,85.4996,0.634512,44.2012,0.565585,80.108,0.625994
5,set_transformer_xy,62.046,0.613601,88.9012,0.556067,44.8648,0.50329,81.0464,0.488083


In [4]:
# Separate mean and margin of error tables
mean_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                 Model Type  Train Strict Accuracy (%)_mean  \
0                   deepset                         44.7572   
1                deepset_xy                         45.2524   
2       deepset_xy_additive                         45.3484   
3           set_transformer                         62.2508   
4  set_transformer_additive                         55.3992   
5        set_transformer_xy                         62.0460   

   Train ±1 Grade Accuracy (%)_mean  Val Strict Accuracy (%)_mean  \
0                           82.3640                       43.4340   
1                           82.5496                       44.0640   
2                           82.7312                       43.6208   
3                           88.7860                       44.7120   
4                           85.4996                       44.2012   
5                           88.9012                       44.8648   

   Val ±1 Grade Accuracy (%)_mean  
0                         81.1976  
1 