In [None]:
import pandas as pd
import scipy.stats as stats

# Load the uploaded CSV file
file_path = "../result/accuracy.csv"
df = pd.read_csv(file_path)

# List of metric columns
metrics = ['Train Strict Accuracy (%)', 'Train ±1 Grade Accuracy (%)', 'Val Strict Accuracy (%)', 'Val ±1 Grade Accuracy (%)']

# Function to compute statistics
def compute_stats(group):
    stats_dict = {}
    n = len(group)
    for col in metrics:
        mean = group[col].mean()
        std = group[col].std(ddof=1)
        se = std / (n ** 0.5)
        margin_of_error = stats.t.ppf(0.975, df=n-1) * se  # 95% confidence
        stats_dict[f'{col}_mean'] = mean
        stats_dict[f'{col}_moe'] = margin_of_error
    return pd.Series(stats_dict)

# Group by model and compute statistics
summary_df = df.groupby('model').apply(compute_stats).reset_index()

# Save the result to a new CSV file
summary_df.to_csv("analyze_result.csv", index=False)

# Show result
summary_df


In [None]:
import pandas as pd
import numpy as np

def process_ordinal_results(input_path, output_path_combined):
    # 1. Read Excel
    df = pd.read_excel(input_path)

    # 2. Compute mean & std by (model, threshold)
    summary = (
        df.groupby(["model", "threshold"])["accuracy"]
          .agg(["mean", "std"])
          .reset_index()
    )

    # 3. Create a "mean ± std" string
    summary["combined"] = summary.apply(
        lambda row: f"{row['mean']:.2f} ± {row['std']:.2f}",
        axis=1
    )

    # 4. Convert to wide format (like your screenshot)
    combined_wide = summary.pivot(
        index="model",
        columns="threshold",
        values="combined"
    )

    # 5. Sort columns in natural V4 → V13 order
    def sort_key(col):
        try:
            return int(col.split("V")[1].replace(")", ""))
        except:
            return 999

    combined_wide = combined_wide.reindex(
        sorted(combined_wide.columns, key=sort_key),
        axis=1
    )

    # 6. Save to CSV
    combined_wide.to_csv(output_path_combined)

    return combined_wide



# Usage
result_df = process_ordinal_results(
    "../result/ordinal_result.xlsx",
    "ordinal_result_summary.csv"
)
print(result_df)


In [1]:
import pandas as pd
import numpy as np

def process_ordinal_results_mean_only(input_path, output_path_combined):
    # 1. Read Excel
    df = pd.read_excel(input_path)

    # 2. Compute mean by (model, threshold)
    summary = (
        df.groupby(["model", "threshold"])["accuracy"]
          .mean()
          .reset_index()
    )

    # 3. Convert to wide format
    combined_wide = summary.pivot(
        index="model",
        columns="threshold",
        values="accuracy"
    )

    # 4. Sort columns in natural V4 → V13 order
    def sort_key(col):
        try:
            return int(col.split("V")[1].replace(")", ""))
        except:
            return 999

    combined_wide = combined_wide.reindex(
        sorted(combined_wide.columns, key=sort_key),
        axis=1
    )

    # 5. Save to CSV
    combined_wide.to_csv(output_path_combined)

    return combined_wide

# Usage
result_df = process_ordinal_results_mean_only(
    "../result/ordinal_result_encoder_only.xlsx",
    "ordinal_result_encoder_only.csv"
)
print(result_df)

threshold                                        P(>V4)     P(>V5)     P(>V6)  \
model                                                                           
deepset_ordinal                               81.370400  80.591200  83.755199   
deepset_ordinal_xy                            81.401599  80.642800  83.816000   
deepset_ordinal_xy_additive                   81.209600  80.540800  83.840400   
ordinal_adaboost_ensemble_all                 84.180400  82.433600  85.832400   
ordinal_adaboost_ensemble_deepset             81.502000  80.857600  83.856400   
ordinal_adaboost_ensemble_set_transformer     84.501600  83.104799  86.424800   
ordinal_gbm_ensemble_all                      83.980399  82.490000  85.806400   
ordinal_gbm_ensemble_deepset                  81.538000  80.560000  83.690400   
ordinal_gbm_ensemble_set_transformer          84.186400  82.627200  85.918400   
ordinal_soft_voting_ensemble_all              84.180400  82.433600  85.832400   
ordinal_soft_voting_ensemble