In [1]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('model').apply(compute_stats).reset_index()


Unnamed: 0,model,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,adaboost_deepset,46.6756,0.195036,83.1616,0.306025,45.3028,0.287746,81.6776,0.340809
1,adaboost_deepset_xy,46.5544,0.246838,83.2344,0.281032,44.9216,0.344714,81.6512,0.319823
2,adaboost_deepset_xy_additive,45.2216,0.314519,82.1484,0.289448,43.7476,0.296899,80.1752,0.338182
3,adaboost_set_transformer,52.322,0.229925,84.3028,0.216796,46.2004,0.348486,81.2416,0.225027
4,adaboost_set_transformer_additive,48.4848,0.374896,82.7244,0.249601,45.2052,0.458691,80.9644,0.31412
5,adaboost_set_transformer_xy,52.2008,0.191205,84.2544,0.22882,46.52,0.363246,81.6124,0.313124
6,deepset,44.912,0.336905,82.534,0.486828,42.5172,0.326852,80.2384,0.510738
7,deepset_xy,45.1156,0.490029,82.2384,0.437476,42.7144,0.448679,80.0676,0.501075
8,deepset_xy_additive,44.6536,0.490924,81.7728,0.695937,42.3984,0.446249,79.8896,0.718629
9,gbm_ensemble_all,80.8496,0.185236,95.6164,0.123402,49.1048,0.362258,83.186,0.256367


In [None]:
# Separate mean and margin of error tables
mean_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['model'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)
