In [2]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
# Train Strict Accuracy (%),Train ±1 Grade Accuracy (%),Val Strict Accuracy (%),Val ±1 Grade Accuracy (%)
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Show result
summary_df


  summary_df = df.groupby('model').apply(compute_stats).reset_index()


Unnamed: 0,model,Train Strict Accuracy (%)_mean,Train Strict Accuracy (%)_moe,Train ±1 Grade Accuracy (%)_mean,Train ±1 Grade Accuracy (%)_moe,Val Strict Accuracy (%)_mean,Val Strict Accuracy (%)_moe,Val ±1 Grade Accuracy (%)_mean,Val ±1 Grade Accuracy (%)_moe
0,deepset,45.3448,0.446483,82.5324,0.523432,42.9268,0.393669,80.2176,0.532586
1,deepset_xy,45.262,0.438182,82.3904,0.452115,42.7492,0.370957,80.1216,0.533701
2,deepset_xy_additive,44.7052,0.680271,82.114,0.611853,42.4848,0.65306,80.0308,0.611661
3,geometric_mean_ensemble_all,61.9792,0.34275,89.0788,0.19945,47.0268,0.287692,83.1328,0.19878
4,geometric_mean_ensemble_deepset,45.4032,0.275113,82.9556,0.322471,43.2768,0.263605,80.6568,0.351699
5,geometric_mean_ensemble_set_transformer,67.2724,0.344816,91.3444,0.314599,47.43,0.449835,83.1532,0.237826
6,median_ensemble_all,53.8788,0.416098,85.906,0.238836,45.28,0.215616,82.3236,0.228046
7,median_ensemble_deepset,45.3936,0.278516,82.8452,0.328186,43.0836,0.263557,80.5688,0.347501
8,median_ensemble_set_transformer,65.6756,0.368123,90.6568,0.345709,46.5412,0.469414,82.4448,0.26153
9,set_transformer,61.8472,0.628318,88.4284,0.608357,44.1116,0.60177,79.2936,0.508973


In [2]:
# Separate mean and margin of error tables
mean_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_mean')]
moe_columns = ['Model Type'] + [col for col in summary_df.columns if col.endswith('_moe')]

mean_df = summary_df[mean_columns]
moe_df = summary_df[moe_columns]

# Show both tables
print(mean_df)
print(moe_df)


                                 Model Type  Train Strict Accuracy (%)_mean  \
0                                   deepset                         45.1656   
1                                deepset_xy                         45.0200   
2                       deepset_xy_additive                         45.1896   
3               geometric_mean_ensemble_all                         61.9432   
4           geometric_mean_ensemble_deepset                         45.3976   
5   geometric_mean_ensemble_set_transformer                         67.1820   
6                       median_ensemble_all                         53.8304   
7                   median_ensemble_deepset                         45.3876   
8           median_ensemble_set_transformer                         65.6644   
9                           set_transformer                         61.5816   
10                 set_transformer_additive                         55.8380   
11                       set_transformer_xy         