In [3]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('Model Type').apply(compute_stats).reset_index()


Unnamed: 0,Model Type,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,deepset,45.220385,0.515632,82.442692,0.559715,43.830769,0.331075,80.530385,0.403533
1,deepset_xy,45.737308,0.440418,83.170769,0.364212,44.220769,0.263279,81.046154,0.260195
2,deepset_xy_additive,45.341923,0.545748,82.782692,0.45854,43.936154,0.412658,80.993462,0.372606
3,set_transformer,61.971538,0.504959,88.579615,0.457362,43.758077,0.511609,79.823846,0.584574
4,set_transformer_additive,55.976154,0.713977,86.426923,0.605894,43.999615,0.715821,80.798077,0.709687
5,set_transformer_xy,62.136538,0.566442,88.7,0.54181,44.486923,0.51115,80.496154,0.581073


In [3]:
# Separate mean and margin of error tables
mean_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                      model  strict_train_mean  ±1_train_mean  \
0                   deepset            45.5730        82.7335   
1                deepset_xy            45.1460        82.4730   
2       deepset_xy_additive            45.4365        82.5090   
3           set_transformer            61.8350        88.3905   
4  set_transformer_additive            56.0750        85.7315   
5        set_transformer_xy            62.1485        88.6370   

   strict_test_mean  ±1_test_mean  
0           44.2875       80.7695  
1           43.6720       80.4900  
2           43.6000       80.8585  
3           43.6735       80.0775  
4           44.1180       80.1155  
5           44.2755       80.2835  
                      model  strict_train_moe  ±1_train_moe  strict_test_moe  \
0                   deepset          0.469456      0.467788         0.432078   
1                deepset_xy          0.532350      0.613021         0.459216   
2       deepset_xy_additive          0.614571      0