In [76]:
import sys
sys.path.append("./modules")

from modules.data_utils import load_data, extract_xy_from_loader, sample_batch, PCAEncoder, data_scaler, dataset_dataloaders
from modules.plot_utils import visualize_umap, show_images_batch, data_variance_plot, feature_heatmap, plot_metric_with_std, plot_confusion_matrix, plot_auroc, plot_prec, plot_reconstructed_heatmaps_with_predictions, plot_noise_comparison
from modules.model_utils import cv_qcnn, QuantumLinear, gradcam_model, ClassicalModel, dv_qcnn, DVQuantumLinear, get_cv_qcnn_qnode, get_dv_qcnn_qnode
from modules.training_validating import run_kfold_training, evaluate_v2, noise_robustness_validation

import torch
from torch.utils.data import DataLoader, Subset
from torch.utils.data import TensorDataset
from pennylane import draw_mpl
import torch.optim as optim
import torch.nn as nn

import pickle
import json
import umap
import os
import numpy as np

import matplotlib.pyplot as plt
import pandas as pd
from scipy.stats import friedmanchisquare, wilcoxon, norm

In [77]:
# metrics_dir = "/home/dalopezm/quantum-studies/quantum-cv/model_checkpoints"
metrics_dir = "/home/dalopezm/gaussian-qnns/model_checkpoints"
dataset_name = "pneumonia"
# dataset_name = "organ"
# dataset_name = "breast"
models = ["classical", "dv", "cv"]



## CHECKING STORED DATA

In [63]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_linear_means_stds_metrics.json') as f:
    cv_pneumonia_stds = json.load(f)
    print(cv_pneumonia_stds)

{'acc': {'metric': 'acc', 'train_name': 0.8838137542365546, 'train_std': 0.006951201248749635, 'val_mean': 0.8636506950618337, 'val_std': 0.04771299499291847}, 'loss': {'metric': 'loss', 'train_name': 0.26892156771116743, 'train_std': 0.026210189489583154, 'val_mean': 0.3978221916496348, 'val_std': 0.2177165279357924}, 'rec': {'metric': 'rec', 'train_name': 0.8838137542365546, 'train_std': 0.006951201248749635, 'val_mean': 0.8636506950618337, 'val_std': 0.04771299499291847}, 'prec': {'metric': 'prec', 'train_name': 0.8815458734998733, 'train_std': 0.0068266439263163226, 'val_mean': 0.8697962686536144, 'val_std': 0.03699552384615573}, 'f1': {'metric': 'f1', 'train_name': 0.882154177145229, 'train_std': 0.006691683328063826, 'val_mean': 0.8636506950618337, 'val_std': 0.04771299499291847}}


In [40]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/classical-classical_means_stds_metrics.json') as f:
    classical_pneumonia_stds = json.load(f)
    print(classical_pneumonia_stds)

{'acc': {'metric': 'acc', 'train_name': 0.902719597803709, 'train_std': 0.005652053697108205, 'val_mean': 0.8999591339095181, 'val_std': 0.008239846306414065}, 'loss': {'metric': 'loss', 'train_name': 0.22370036956296704, 'train_std': 0.01094141916989291, 'val_mean': 0.22882060316575112, 'val_std': 0.01971767442274417}, 'rec': {'metric': 'rec', 'train_name': 0.902719597803709, 'train_std': 0.005652053697108205, 'val_mean': 0.8999591339095181, 'val_std': 0.008239846306414065}, 'prec': {'metric': 'prec', 'train_name': 0.9023302251646155, 'train_std': 0.005444881438184116, 'val_mean': 0.899768126567471, 'val_std': 0.0076034941179643515}, 'f1': {'metric': 'f1', 'train_name': 0.9024679880640525, 'train_std': 0.0055515945896235375, 'val_mean': 0.8999591339095181, 'val_std': 0.008239846306414065}}


In [41]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_dv-linear_means_stds_metrics.json') as f:
    dv_pneumonia_stds = json.load(f)
    print(dv_pneumonia_stds)

{'acc': {'metric': 'acc', 'train_name': 0.8997452703581179, 'train_std': 0.0011596391287497386, 'val_mean': 0.8993197555071116, 'val_std': 0.0018321649482827482}, 'loss': {'metric': 'loss', 'train_name': 0.24182763775868268, 'train_std': 0.0018906610684890468, 'val_mean': 0.2430099160454957, 'val_std': 0.0047294488020773765}, 'rec': {'metric': 'rec', 'train_name': 0.8997452703581179, 'train_std': 0.0011596391287497386, 'val_mean': 0.8993197555071116, 'val_std': 0.0018321649482827482}, 'prec': {'metric': 'prec', 'train_name': 0.8987839308990746, 'train_std': 0.0012752539557023056, 'val_mean': 0.8982724960341528, 'val_std': 0.001995274857717674}, 'f1': {'metric': 'f1', 'train_name': 0.8991584926460355, 'train_std': 0.001232326905943121, 'val_mean': 0.8993197555071116, 'val_std': 0.0018321649482827482}}


In [42]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_organ-dv-linear_means_stds_metrics.json') as f:
    dv_organ_stds = json.load(f)
    print(dv_organ_stds)

{'acc': {'metric': 'acc', 'train_name': 0.45587384259259256, 'train_std': 0.022306694576634208, 'val_mean': 0.45785108024691357, 'val_std': 0.025187834482834962}, 'loss': {'metric': 'loss', 'train_name': 1.5109415283357654, 'train_std': 0.0572332206361616, 'val_mean': 1.5141224787559038, 'val_std': 0.0595420536104125}, 'rec': {'metric': 'rec', 'train_name': 0.45587384259259256, 'train_std': 0.022306694576634208, 'val_mean': 0.45785108024691357, 'val_std': 0.025187834482834962}, 'prec': {'metric': 'prec', 'train_name': 0.41229991481709066, 'train_std': 0.019591361243498983, 'val_mean': 0.4140043246906157, 'val_std': 0.0284493255238091}, 'f1': {'metric': 'f1', 'train_name': 0.4076269997281112, 'train_std': 0.021255511740697043, 'val_mean': 0.45785108024691357, 'val_std': 0.025187834482834962}}


In [43]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/classical-organ-classical_means_stds_metrics.json') as f:
    classical_organ_stds = json.load(f)
    print(classical_organ_stds)

{'acc': {'metric': 'acc', 'train_name': 0.5323591820987654, 'train_std': 0.020186079619487367, 'val_mean': 0.5341435185185185, 'val_std': 0.02066790674586639}, 'loss': {'metric': 'loss', 'train_name': 1.2996877278258772, 'train_std': 0.03833437395778905, 'val_mean': 1.304159249788449, 'val_std': 0.02678658312730467}, 'rec': {'metric': 'rec', 'train_name': 0.5323591820987654, 'train_std': 0.020186079619487367, 'val_mean': 0.5341435185185185, 'val_std': 0.02066790674586639}, 'prec': {'metric': 'prec', 'train_name': 0.4688729924050487, 'train_std': 0.019280633019589006, 'val_mean': 0.47570043091392505, 'val_std': 0.016133174185887916}, 'f1': {'metric': 'f1', 'train_name': 0.48051248951543574, 'train_std': 0.0194938203197307, 'val_mean': 0.5341435185185185, 'val_std': 0.02066790674586639}}


In [44]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_organ-linear_means_stds_metrics.json') as f:
    cv_organ_stds = json.load(f)
    print(cv_organ_stds)

{'acc': {'metric': 'acc', 'train_name': 0.5483217592592592, 'train_std': 0.0033264977548187677, 'val_mean': 0.5492862654320987, 'val_std': 0.0033187983283343506}, 'loss': {'metric': 'loss', 'train_name': 1.317084689659101, 'train_std': 0.005281459391276092, 'val_mean': 1.3191297981474135, 'val_std': 0.020012125503196206}, 'rec': {'metric': 'rec', 'train_name': 0.5483217592592592, 'train_std': 0.0033264977548187677, 'val_mean': 0.5492862654320987, 'val_std': 0.0033187983283343506}, 'prec': {'metric': 'prec', 'train_name': 0.5039077647167538, 'train_std': 0.004250090842237557, 'val_mean': 0.5048525694600237, 'val_std': 0.001018539281073055}, 'f1': {'metric': 'f1', 'train_name': 0.5096495855188649, 'train_std': 0.003379576785638079, 'val_mean': 0.5492862654320987, 'val_std': 0.0033187983283343506}}


In [45]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_breast-linear_means_stds_metrics.json') as f:
    cv_breast_stds = json.load(f)
    print(cv_breast_stds)

{'acc': {'metric': 'acc', 'train_name': 0.6913919413919415, 'train_std': 0.022130120830759263, 'val_mean': 0.6904761904761906, 'val_std': 0.049416621110740064}, 'loss': {'metric': 'loss', 'train_name': 0.5760508395813324, 'train_std': 0.032441925375175304, 'val_mean': 0.5833050370653033, 'val_std': 0.04093021041908495}, 'rec': {'metric': 'rec', 'train_name': 0.6913919413919415, 'train_std': 0.022130120830759263, 'val_mean': 0.6904761904761906, 'val_std': 0.049416621110740064}, 'prec': {'metric': 'prec', 'train_name': 0.7000781685838137, 'train_std': 0.01802419374346797, 'val_mean': 0.6940143587421592, 'val_std': 0.030107240920626333}, 'f1': {'metric': 'f1', 'train_name': 0.6941491210672807, 'train_std': 0.0187218647520265, 'val_mean': 0.6904761904761906, 'val_std': 0.049416621110740064}}


In [46]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/cv_breast-dv-linear_means_stds_metrics.json') as f:
    dv_breast_stds = json.load(f)
    print(dv_breast_stds)

{'acc': {'metric': 'acc', 'train_name': 0.7326007326007327, 'train_std': 0.011290135536573225, 'val_mean': 0.7252747252747254, 'val_std': 0.01617538620206563}, 'loss': {'metric': 'loss', 'train_name': 0.5475635773096329, 'train_std': 0.021824360954395022, 'val_mean': 0.5556764438912108, 'val_std': 0.024567601144856524}, 'rec': {'metric': 'rec', 'train_name': 0.7326007326007327, 'train_std': 0.011290135536573225, 'val_mean': 0.7252747252747254, 'val_std': 0.01617538620206563}, 'prec': {'metric': 'prec', 'train_name': 0.6180672401419027, 'train_std': 0.0765382958787802, 'val_mean': 0.6134451734363956, 'val_std': 0.07511261913300496}, 'f1': {'metric': 'f1', 'train_name': 0.6423750210081399, 'train_std': 0.03296607979372724, 'val_mean': 0.7252747252747254, 'val_std': 0.01617538620206563}}


In [47]:
with open('/home/dalopezm/quantum-studies/quantum-cv/results/classical-breast-classical_means_stds_metrics.json') as f:
    classical_breast_stds = json.load(f)
    print(classical_breast_stds)

{'acc': {'metric': 'acc', 'train_name': 0.73992673992674, 'train_std': 0.012950673648105272, 'val_mean': 0.7344322344322345, 'val_std': 0.00933886357800878}, 'loss': {'metric': 'loss', 'train_name': 0.5198891145405752, 'train_std': 0.003120498409385244, 'val_mean': 0.5248923275496934, 'val_std': 0.01993247792093469}, 'rec': {'metric': 'rec', 'train_name': 0.73992673992674, 'train_std': 0.012950673648105272, 'val_mean': 0.7344322344322345, 'val_std': 0.00933886357800878}, 'prec': {'metric': 'prec', 'train_name': 0.6618669192633689, 'train_std': 0.09163980293091342, 'val_mean': 0.6492649776904088, 'val_std': 0.08192809826036028}, 'f1': {'metric': 'f1', 'train_name': 0.6777281180533636, 'train_std': 0.04311633687271761, 'val_mean': 0.7344322344322345, 'val_std': 0.00933886357800878}}


## STAT ANALYSIS PNEUMONIA

In [78]:
results = {model: [] for model in models}

for model in models:
    for fold in range(1, 4):  # assuming 3-fold CV
        filename = f"{model}_{dataset_name}_fold_{fold}_metrics.pkl"
        file_path = os.path.join(metrics_dir, filename)
        if os.path.exists(file_path):
            with open(file_path, "rb") as f:
                train_metrics, val_metrics = pickle.load(f)
            
            # Get last epoch validation metrics
            final_acc = val_metrics["acc"][-1]
            final_f1 = val_metrics["f1"][-1]
            final_prec = val_metrics["prec"][-1]
            final_rec = val_metrics["rec"][-1]

            results[model].append({
                "fold": fold,
                "acc": final_acc,
                "f1": final_f1,
                "prec": final_prec,
                "rec": final_rec
            })
        else:
            print(f"Missing: {file_path}")

In [79]:
df = pd.concat({
    model: pd.DataFrame(metrics).set_index("fold")
    for model, metrics in results.items() if len(metrics) > 0
}, axis=1)

print("\nExtracted metrics:")
print(df.round(4))


Extracted metrics:
     classical                              dv                          \
           acc      f1    prec     rec     acc      f1    prec     rec   
fold                                                                     
1       0.8924  0.8924  0.8918  0.8924  0.9019  0.9019  0.9009  0.9019   
2       0.9114  0.9114  0.9100  0.9114  0.8980  0.8980  0.8962  0.8980   
3       0.8961  0.8961  0.8975  0.8961  0.8980  0.8980  0.8977  0.8980   

          cv                          
         acc      f1    prec     rec  
fold                                  
1     0.7962  0.7962  0.8175  0.7962  
2     0.8967  0.8967  0.8946  0.8967  
3     0.8980  0.8980  0.8973  0.8980  


In [80]:
df

Unnamed: 0_level_0,classical,classical,classical,classical,dv,dv,dv,dv,cv,cv,cv,cv
Unnamed: 0_level_1,acc,f1,prec,rec,acc,f1,prec,rec,acc,f1,prec,rec
fold,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,0.892357,0.892357,0.891784,0.892357,0.901911,0.901911,0.90095,0.901911,0.796178,0.796178,0.817499,0.796178
2,0.911409,0.911409,0.909998,0.911409,0.898024,0.898024,0.896162,0.898024,0.89675,0.89675,0.894614,0.89675
3,0.896112,0.896112,0.897522,0.896112,0.898024,0.898024,0.897706,0.898024,0.898024,0.898024,0.897276,0.898024


In [81]:
# # Extract metric arrays
# classical = df["classical"]["f1"].values
# dv = df["cv"]["f1"].values
# cv = df["cv"]["f1"].values

# # Friedman test
# stat, p = friedmanchisquare(classical, dv, cv)
# df = 2
# print(f"Friedman: χ²({df}) = {stat:.3f}, p = {p:.4f}")

# # Pairwise Wilcoxon (Bonferroni correction)
# pairs = [("Classical", classical, "DV", dv),
#          ("Classical", classical, "CV", cv),
#          ("DV", dv, "CV", cv)]

# alpha = 0.05 / len(pairs)
# results = []
# for m1, v1, m2, v2 in pairs:
#     if np.allclose(v1, v2):
#         results.append((m1, m2, None, None, alpha, "Identical"))
#     else:
#         stat, p = wilcoxon(v1, v2)
#         sig = "Yes" if p < alpha else "No"
#         results.append((m1, m2, stat, p, alpha, sig))

# results_df = pd.DataFrame(results, columns=["Model 1", "Model 2", "W", "p", "α", "Significant"])
# print(results_df)

In [82]:
metrics = ["acc", "prec", "rec", "f1"]
models = ["classical", "dv", "cv"]

results_all = []

for metric in metrics:
    classical = df["classical"][metric].values
    dv = df["dv"][metric].values
    cv = df["cv"][metric].values

    k = len([classical, dv, cv])
    n = len(classical)

    stat, p = friedmanchisquare(classical, dv, cv)
    df_stat = k - 1
    W = stat / (n * (k-1))

    pairs = [
        ("Classical", classical, "DV", dv),
        ("Classical", classical, "CV", cv),
        ("DV", dv, "CV", cv)
    ]

    alpha = 0.05 / len(pairs)
    pair_results = []

    for m1, v1, m2, v2 in pairs:
        if np.allclose(v1, v2):
            pair_results.append((m1, m2, np.nan, np.nan, alpha, "Identical", np.nan))
        else:
            stat_w, p_w = wilcoxon(v1, v2)
            z = norm.ppf(p_w / 2) if p_w < 1 else 0
            r = abs(z) / np.sqrt(len(v1))
            sig = "Yes" if p_w < alpha else "no"
            pair_results.append((m1, m2, stat_w, p_w, alpha, sig, r))

    results_all.append({
        "Metric": metric,
        "Friedman x": round(stat, 3),
        "df": df_stat,
        "p": round(p, 4),
        "Kendall W": round(W, 3),
        "N (folds)": n,
        "Pairwise": pd.DataFrame(pair_results, columns = ["Model 1", "Model 2", "W", "p", "alpha", "Significant", "Effect size r"])
    })




In [84]:
# Display summaries
for res in results_all:
    print(f"\n=== {res['Metric'].upper()} ===")
    print(f"Friedman χ²({res['df']}), N={res['N (folds)']}, p={res['p']}, W={res['Kendall W']}")
    print(res["Pairwise"])


=== ACC ===
Friedman χ²(2), N=3, p=0.4412, W=0.273
     Model 1 Model 2    W         p     alpha Significant  Effect size r
0  Classical      DV  3.0  1.000000  0.016667          no       0.000000
1  Classical      CV  1.0  0.500000  0.016667          no       0.389417
2         DV      CV  0.0  0.179712  0.016667          no       0.774597

=== PREC ===
Friedman χ²(2), N=3, p=0.097, W=0.778
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  3.0  1.00  0.016667          no       0.000000
1  Classical      CV  0.0  0.25  0.016667          no       0.664155
2         DV      CV  0.0  0.25  0.016667          no       0.664155

=== REC ===
Friedman χ²(2), N=3, p=0.4412, W=0.273
     Model 1 Model 2    W         p     alpha Significant  Effect size r
0  Classical      DV  3.0  1.000000  0.016667          no       0.000000
1  Classical      CV  1.0  0.500000  0.016667          no       0.389417
2         DV      CV  0.0  0.179712  0.016667          no

## STAT ANALYSIS ORGAN

In [85]:
dataset_name = "organ"


In [86]:
results = {model: [] for model in models}

for model in models:
    for fold in range(1, 4):  # assuming 3-fold CV
        filename = f"{model}_{dataset_name}_fold_{fold}_metrics.pkl"
        file_path = os.path.join(metrics_dir, filename)
        if os.path.exists(file_path):
            with open(file_path, "rb") as f:
                train_metrics, val_metrics = pickle.load(f)
            
            # Get last epoch validation metrics
            final_acc = val_metrics["acc"][-1]
            final_f1 = val_metrics["f1"][-1]
            final_prec = val_metrics["prec"][-1]
            final_rec = val_metrics["rec"][-1]

            results[model].append({
                "fold": fold,
                "acc": final_acc,
                "f1": final_f1,
                "prec": final_prec,
                "rec": final_rec
            })
        else:
            print(f"Missing: {file_path}")

In [87]:
df = pd.concat({
    model: pd.DataFrame(metrics).set_index("fold")
    for model, metrics in results.items() if len(metrics) > 0
}, axis=1)

print("\nExtracted metrics:")
print(df.round(4))


Extracted metrics:
     classical                              dv                          \
           acc      f1    prec     rec     acc      f1    prec     rec   
fold                                                                     
1       0.5049  0.5049  0.4644  0.5049  0.4688  0.4688  0.4044  0.4688   
2       0.5492  0.5492  0.4985  0.5492  0.4230  0.4230  0.3850  0.4230   
3       0.5483  0.5483  0.4642  0.5483  0.4818  0.4818  0.4526  0.4818   

          cv                          
         acc      f1    prec     rec  
fold                                  
1     0.5535  0.5535  0.5061  0.5535  
2     0.5489  0.5489  0.5049  0.5489  
3     0.5454  0.5454  0.5036  0.5454  


In [88]:
df

Unnamed: 0_level_0,classical,classical,classical,classical,dv,dv,dv,dv,cv,cv,cv,cv
Unnamed: 0_level_1,acc,f1,prec,rec,acc,f1,prec,rec,acc,f1,prec,rec
fold,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,0.504919,0.504919,0.464388,0.504919,0.46875,0.46875,0.40442,0.46875,0.55353,0.55353,0.506059,0.55353
2,0.54919,0.54919,0.498516,0.54919,0.423032,0.423032,0.384956,0.423032,0.5489,0.5489,0.504931,0.5489
3,0.548322,0.548322,0.464197,0.548322,0.481771,0.481771,0.452636,0.481771,0.545428,0.545428,0.503568,0.545428


In [89]:
metrics = ["acc", "prec", "rec", "f1"]
models = ["classical", "dv", "cv"]

results_all = []

for metric in metrics:
    classical = df["classical"][metric].values
    dv = df["dv"][metric].values
    cv = df["cv"][metric].values

    k = len([classical, dv, cv])
    n = len(classical)

    stat, p = friedmanchisquare(classical, dv, cv)
    df_stat = k - 1
    W = stat / (n * (k-1))

    pairs = [
        ("Classical", classical, "DV", dv),
        ("Classical", classical, "CV", cv),
        ("DV", dv, "CV", cv)
    ]

    alpha = 0.05 / len(pairs)
    pair_results = []

    for m1, v1, m2, v2 in pairs:
        if np.allclose(v1, v2):
            pair_results.append((m1, m2, np.nan, np.nan, alpha, "Identical", np.nan))
        else:
            stat_w, p_w = wilcoxon(v1, v2)
            z = norm.ppf(p_w / 2) if p_w < 1 else 0
            r = abs(z) / np.sqrt(len(v1))
            sig = "Yes" if p_w < alpha else "no"
            pair_results.append((m1, m2, stat_w, p_w, alpha, sig, r))

    results_all.append({
        "Metric": metric,
        "Friedman x": round(stat, 3),
        "df": df_stat,
        "p": round(p, 4),
        "Kendall W": round(W, 3),
        "N (folds)": n,
        "Pairwise": pd.DataFrame(pair_results, columns = ["Model 1", "Model 2", "W", "p", "alpha", "Significant", "Effect size r"])
    })


In [90]:
# Display summaries
for res in results_all:
    print(f"\n=== {res['Metric'].upper()} ===")
    print(f"Friedman χ²({res['df']}), N={res['N (folds)']}, p={res['p']}, W={res['Kendall W']}")
    print(res["Pairwise"])


=== ACC ===
Friedman χ²(2), N=3, p=0.097, W=0.778
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  0.0  0.25  0.016667          no       0.664155
1  Classical      CV  3.0  1.00  0.016667          no       0.000000
2         DV      CV  0.0  0.25  0.016667          no       0.664155

=== PREC ===
Friedman χ²(2), N=3, p=0.0498, W=1.0
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  0.0  0.25  0.016667          no       0.664155
1  Classical      CV  0.0  0.25  0.016667          no       0.664155
2         DV      CV  0.0  0.25  0.016667          no       0.664155

=== REC ===
Friedman χ²(2), N=3, p=0.097, W=0.778
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  0.0  0.25  0.016667          no       0.664155
1  Classical      CV  3.0  1.00  0.016667          no       0.000000
2         DV      CV  0.0  0.25  0.016667          no       0.664155

=== F1 ===
Friedma

## STAT ANALYSIS BREAST

In [91]:
dataset_name = "breast"

In [92]:
results = {model: [] for model in models}

for model in models:
    for fold in range(1, 4):  # assuming 3-fold CV
        filename = f"{model}_{dataset_name}_fold_{fold}_metrics.pkl"
        file_path = os.path.join(metrics_dir, filename)
        if os.path.exists(file_path):
            with open(file_path, "rb") as f:
                train_metrics, val_metrics = pickle.load(f)
            
            # Get last epoch validation metrics
            final_acc = val_metrics["acc"][-1]
            final_f1 = val_metrics["f1"][-1]
            final_prec = val_metrics["prec"][-1]
            final_rec = val_metrics["rec"][-1]

            results[model].append({
                "fold": fold,
                "acc": final_acc,
                "f1": final_f1,
                "prec": final_prec,
                "rec": final_rec
            })
        else:
            print(f"Missing: {file_path}")

In [93]:
df = pd.concat({
    model: pd.DataFrame(metrics).set_index("fold")
    for model, metrics in results.items() if len(metrics) > 0
}, axis=1)

print("\nExtracted metrics:")
print(df.round(4))


Extracted metrics:
     classical                              dv                          \
           acc      f1    prec     rec     acc      f1    prec     rec   
fold                                                                     
1       0.7308  0.7308  0.5340  0.7308  0.7418  0.7418  0.7142  0.7418   
2       0.7473  0.7473  0.7173  0.7473  0.7308  0.7308  0.5340  0.7308   
3       0.7253  0.7253  0.6965  0.7253  0.7033  0.7033  0.5921  0.7033   

          cv                          
         acc      f1    prec     rec  
fold                                  
1     0.6209  0.6209  0.6514  0.6209  
2     0.7198  0.7198  0.7145  0.7198  
3     0.7308  0.7308  0.7161  0.7308  


In [94]:
df

Unnamed: 0_level_0,classical,classical,classical,classical,dv,dv,dv,dv,cv,cv,cv,cv
Unnamed: 0_level_1,acc,f1,prec,rec,acc,f1,prec,rec,acc,f1,prec,rec
fold,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
1,0.730769,0.730769,0.534024,0.730769,0.741758,0.741758,0.714246,0.741758,0.620879,0.620879,0.651447,0.620879
2,0.747253,0.747253,0.717273,0.747253,0.730769,0.730769,0.534024,0.730769,0.71978,0.71978,0.714453,0.71978
3,0.725275,0.725275,0.696498,0.725275,0.703297,0.703297,0.592066,0.703297,0.730769,0.730769,0.716143,0.730769


In [95]:
metrics = ["acc", "prec", "rec", "f1"]
models = ["classical", "dv", "cv"]

results_all = []

for metric in metrics:
    classical = df["classical"][metric].values
    dv = df["dv"][metric].values
    cv = df["cv"][metric].values

    k = len([classical, dv, cv])
    n = len(classical)

    stat, p = friedmanchisquare(classical, dv, cv)
    df_stat = k - 1
    W = stat / (n * (k-1))

    pairs = [
        ("Classical", classical, "DV", dv),
        ("Classical", classical, "CV", cv),
        ("DV", dv, "CV", cv)
    ]

    alpha = 0.05 / len(pairs)
    pair_results = []

    for m1, v1, m2, v2 in pairs:
        if np.allclose(v1, v2):
            pair_results.append((m1, m2, np.nan, np.nan, alpha, "Identical", np.nan))
        else:
            stat_w, p_w = wilcoxon(v1, v2)
            z = norm.ppf(p_w / 2) if p_w < 1 else 0
            r = abs(z) / np.sqrt(len(v1))
            sig = "Yes" if p_w < alpha else "no"
            pair_results.append((m1, m2, stat_w, p_w, alpha, sig, r))

    results_all.append({
        "Metric": metric,
        "Friedman x": round(stat, 3),
        "df": df_stat,
        "p": round(p, 4),
        "Kendall W": round(W, 3),
        "N (folds)": n,
        "Pairwise": pd.DataFrame(pair_results, columns = ["Model 1", "Model 2", "W", "p", "alpha", "Significant", "Effect size r"])
    })

In [96]:
# Display summaries
for res in results_all:
    print(f"\n=== {res['Metric'].upper()} ===")
    print(f"Friedman χ²({res['df']}), N={res['N (folds)']}, p={res['p']}, W={res['Kendall W']}")
    print(res["Pairwise"])


=== ACC ===
Friedman χ²(2), N=3, p=0.7165, W=0.111
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  1.0  0.50  0.016667          no       0.389417
1  Classical      CV  1.0  0.50  0.016667          no       0.389417
2         DV      CV  2.0  0.75  0.016667          no       0.183967

=== PREC ===
Friedman χ²(2), N=3, p=0.7165, W=0.111
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  2.0  0.75  0.016667          no       0.183967
1  Classical      CV  1.0  0.50  0.016667          no       0.389417
2         DV      CV  1.0  0.50  0.016667          no       0.389417

=== REC ===
Friedman χ²(2), N=3, p=0.7165, W=0.111
     Model 1 Model 2    W     p     alpha Significant  Effect size r
0  Classical      DV  1.0  0.50  0.016667          no       0.389417
1  Classical      CV  1.0  0.50  0.016667          no       0.389417
2         DV      CV  2.0  0.75  0.016667          no       0.183967

=== F1 ===
Fri