In [None]:
import pandas as pd
from scipy.stats import ttest_rel

# change the path to your data results file
df = pd.read_csv("results_data.csv") 

calculate t test

In [None]:
import pandas as pd
from scipy.stats import ttest_rel

configs = [
    ("False", "false"),
    ("False", "True"),
    ("True", "False"),
    ("True", "True")
]

metric = "val_accuracy"

# vit vs resnet
vit_vs_resnet_split = []

for pretrained_setting in [True, False]:
    for sd, aug in configs:
        subset = df[
            (df['pretrained'] == pretrained_setting) &
            (df['stable_diffusion'] == sd) &
            (df['data_augmentation'] == aug) &
            (df['model'].isin(['ViT', 'ResNet50']))
        ]

        vit_vals = subset[subset['model'] == 'ViT'][metric].reset_index(drop=True)
        resnet_vals = subset[subset['model'] == 'ResNet50'][metric].reset_index(drop=True)

        min_len = min(len(vit_vals), len(resnet_vals))
        vit_vals = vit_vals[:min_len]
        resnet_vals = resnet_vals[:min_len]

        if min_len > 2:
            t_stat, t_pval = ttest_rel(vit_vals, resnet_vals)
        else:
            t_stat = t_pval = None

        vit_vs_resnet_split.append({
            "comparison": "ViT vs ResNet50",
            "pretrained": pretrained_setting,
            "model": "ViT vs ResNet50",
            "stable_diffusion": sd,
            "data_augmentation": aug,
            "n_pairs": min_len,
            "t_statistic": t_stat,
            "t_p_value": t_pval
        })


# Pretrained vs Non-Pretrained
pretrained_comparisons = []

for model_type in ['ViT', 'ResNet50']:
    for sd, aug in configs:
        subset = df[
            (df['model'] == model_type) &
            (df['stable_diffusion'] == sd) &
            (df['data_augmentation'] == aug)
        ]

        pre_vals = subset[subset['pretrained'] == True][metric].reset_index(drop=True)
        nonpre_vals = subset[subset['pretrained'] == False][metric].reset_index(drop=True)

        min_len = min(len(pre_vals), len(nonpre_vals))
        pre_vals = pre_vals[:min_len]
        nonpre_vals = nonpre_vals[:min_len]

        if min_len > 2:
            t_stat, t_pval = ttest_rel(pre_vals, nonpre_vals)
        else:
            t_stat = t_pval = None

        pretrained_comparisons.append({
            "comparison": "Pretrained vs Non-Pretrained",
            "pretrained": "True vs False",
            "model": model_type,
            "stable_diffusion": sd,
            "data_augmentation": aug,
            "n_pairs": min_len,
            "t_statistic": t_stat,
            "t_p_value": t_pval
        })

# --- Combine all results ---
all_results = pd.DataFrame(vit_vs_resnet_split + pretrained_comparisons)
print(all_results)


calculate mean acc, pre and f1 score and their standard deviation

In [None]:
new_df = df.groupby(["model", "pretrained", "stable_diffusion", "data_augmentation"])

# print model = vit, pretrained = true, stable_diffusion = true, data_augmentation = flip
for pretrained in [True]:
    for stable_diffusion in ["True", "False"]:
        for data_augmentation in ["True", "False"]:
            print(f"Model: ViT, Pretrained: {pretrained}, Stable Diffusion: {stable_diffusion}, Data Augmentation: {data_augmentation}")
            temp = new_df[(new_df["model"] == "ViT") & (new_df["pretrained"] == pretrained) & (new_df["stable_diffusion"] == stable_diffusion) & (new_df["data_augmentation"] == data_augmentation)]
            # calculate the mean and std of val_accuracy
            mean_val_accuracy = temp["val_accuracy"].mean()
            mean_precision = temp["precision"].mean()
            mean_f1_score = temp["f1_score"].mean()
            std_val_accuracy = temp["val_accuracy"].std()
            std_precision = temp["precision"].std()
            std_f1_score = temp["f1_score"].std()
            # print the mean and std
            print(f"Mean Val Accuracy: {mean_val_accuracy}, Std Val Accuracy: {std_val_accuracy}")
            print(f"Mean Precision: {mean_precision}, Std Precision: {std_precision}")
            print(f"Mean F1 Score: {mean_f1_score}, Std F1 Score: {std_f1_score}")
            