In [2]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
metrics = ['strict_train', '±1_train', 'strict_test', '±1_test']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('model').apply(compute_stats).reset_index()


Unnamed: 0,model,strict_train_mean,strict_train_moe,±1_train_mean,±1_train_moe,strict_test_mean,strict_test_moe,±1_test_mean,±1_test_moe
0,deepset,45.573,0.469456,82.7335,0.467788,44.2875,0.432078,80.7695,0.399948
1,deepset_xy,45.146,0.53235,82.473,0.613021,43.672,0.459216,80.49,0.450575
2,deepset_xy_additive,45.4365,0.614571,82.509,0.636206,43.6,0.496121,80.8585,0.578289
3,set_transformer,61.835,0.652561,88.3905,0.566662,43.6735,0.559073,80.0775,0.534577
4,set_transformer_additive,56.075,1.012272,85.7315,1.396382,44.118,1.026983,80.1155,1.298084
5,set_transformer_xy,62.1485,0.836538,88.637,0.738697,44.2755,0.565937,80.2835,0.495926


In [3]:
# Separate mean and margin of error tables
mean_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                      model  strict_train_mean  ±1_train_mean  \
0                   deepset            45.5730        82.7335   
1                deepset_xy            45.1460        82.4730   
2       deepset_xy_additive            45.4365        82.5090   
3           set_transformer            61.8350        88.3905   
4  set_transformer_additive            56.0750        85.7315   
5        set_transformer_xy            62.1485        88.6370   

   strict_test_mean  ±1_test_mean  
0           44.2875       80.7695  
1           43.6720       80.4900  
2           43.6000       80.8585  
3           43.6735       80.0775  
4           44.1180       80.1155  
5           44.2755       80.2835  
                      model  strict_train_moe  ±1_train_moe  strict_test_moe  \
0                   deepset          0.469456      0.467788         0.432078   
1                deepset_xy          0.532350      0.613021         0.459216   
2       deepset_xy_additive          0.614571      0