In [1]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Show result
summary_df




  summary_df = df.groupby('model').apply(compute_stats).reset_index()


Unnamed: 0,model,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,adaboost_deepset,46.5112,0.269235,83.3948,0.262226,45.2036,0.25954,81.8328,0.302236
1,adaboost_deepset_xy,46.5608,0.296124,83.2304,0.224685,45.2584,0.33746,81.7448,0.224924
2,adaboost_deepset_xy_additive,45.0604,0.381091,82.1764,0.256519,43.4828,0.344927,80.0752,0.346653
3,adaboost_set_transformer,52.3672,0.221731,84.4244,0.177115,46.4956,0.33425,81.8804,0.275733
4,adaboost_set_transformer_additive,48.6384,0.321844,82.99,0.208607,45.4504,0.36319,81.1464,0.278215
5,adaboost_set_transformer_xy,52.3704,0.244377,84.2836,0.129828,46.2616,0.317191,81.6816,0.238921
6,deepset,45.2884,0.566852,82.412,0.562115,42.9456,0.553541,80.262,0.580433
7,deepset_xy,45.1928,0.667841,82.252,0.72108,42.82,0.579493,80.0612,0.785272
8,deepset_xy_additive,45.904,0.428412,83.0652,0.530568,43.6404,0.420729,81.148,0.6245
9,gbm_ensemble_all,80.8196,0.204053,95.6636,0.113307,48.7776,0.336583,83.2968,0.269167


In [3]:
# Separate mean and margin of error tables
mean_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                                      model  Train Strict Accuracy (%)_mean  \
0                          adaboost_deepset                         46.5112   
1                       adaboost_deepset_xy                         46.5608   
2              adaboost_deepset_xy_additive                         45.0604   
3                  adaboost_set_transformer                         52.3672   
4         adaboost_set_transformer_additive                         48.6384   
5               adaboost_set_transformer_xy                         52.3704   
6                                   deepset                         45.2884   
7                                deepset_xy                         45.1928   
8                       deepset_xy_additive                         45.9040   
9                          gbm_ensemble_all                         80.8196   
10                     gbm_ensemble_deepset                         64.5548   
11             gbm_ensemble_set_transformer         