In [1]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('model').apply(compute_stats).reset_index()


Unnamed: 0,model,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,adaboost_deepset,46.3196,0.243856,83.1732,0.270388,45.0108,0.329086,81.644,0.313509
1,adaboost_deepset_xy,46.0512,0.24895,83.2572,0.2223,44.6568,0.293504,81.7788,0.247591
2,adaboost_deepset_xy_additive,45.0944,0.29319,81.99,0.234241,43.5204,0.274934,79.7612,0.265092
3,adaboost_set_transformer,52.344,0.258163,84.362,0.170473,46.7664,0.381572,81.554,0.269084
4,adaboost_set_transformer_additive,48.6624,0.236636,82.7288,0.174092,45.5652,0.4237,81.0816,0.255623
5,adaboost_set_transformer_xy,52.2236,0.258204,84.4076,0.124561,46.3528,0.289357,81.5948,0.231832
6,deepset,45.2488,0.502625,82.5204,0.530232,42.8332,0.418352,80.1412,0.55652
7,deepset_xy,45.4456,0.429451,82.778,0.60201,42.9728,0.398788,80.5656,0.650936
8,deepset_xy_additive,44.9888,0.657854,82.1444,0.857217,42.8624,0.516832,80.2008,0.866328
9,gbm_ensemble_all,80.7364,0.224393,95.5372,0.111441,48.8112,0.316469,82.9604,0.24624


In [3]:
# Separate mean and margin of error tables
mean_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                                      model  Train Strict Accuracy (%)_mean  \
0                          adaboost_deepset                         46.5112   
1                       adaboost_deepset_xy                         46.5608   
2              adaboost_deepset_xy_additive                         45.0604   
3                  adaboost_set_transformer                         52.3672   
4         adaboost_set_transformer_additive                         48.6384   
5               adaboost_set_transformer_xy                         52.3704   
6                                   deepset                         45.2884   
7                                deepset_xy                         45.1928   
8                       deepset_xy_additive                         45.9040   
9                          gbm_ensemble_all                         80.8196   
10                     gbm_ensemble_deepset                         64.5548   
11             gbm_ensemble_set_transformer         