# Import

In [22]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import matplotlib.colors as mcolors
from experiment_hyperparameters import SPLITS, SIZES

from Arquitecture.InformationExtractor import InformationExtractor
from experiment_result_processing import  rebuild_model, build_dataset, get_results, plot_and_save_confusion_matrix

In [3]:
RESULT_PATH = os.path.expanduser("~/Desktop/temp/resultados/")

In [4]:
pcs = os.listdir(RESULT_PATH)
pcs = [dir for dir in pcs if not dir.endswith(".zip")]
pcs

['ordenador1', 'ordenador2', 'ordenador3']

In [6]:
experiment_results = {column: [] for column in ['split', 'seed', 'size', 'Precision', 'Recall', 'F1_score', 'Accuracy', 'AUC_ROC']}

for dir in pcs:
    pc_dir = os.path.join(RESULT_PATH, dir, "iViT_RESULTS")
    experiments = os.listdir(pc_dir)
    experiments.remove("experiment_pipeline")
    for experiment in experiments:
        experiment_dir = os.path.join(pc_dir, experiment)
        splits = os.listdir(experiment_dir)
        seed = experiment.split("_")[-1]
        for split in splits:
            
            for i in SIZES:
                result_df = pd.read_csv(os.path.join(experiment_dir, split, str(i), "result.csv"))
                
                metrics = result_df.columns.to_list()
                metrics.remove("Clase")
                
                columns = ["split", "seed", "size"]
                columns = columns + metrics
                    
                experiment_results["split"].append(split)
                experiment_results["seed"].append(seed)
                experiment_results["size"].append(i)
                
                for metric in metrics:
                    experiment_results[metric].append(float(result_df[metric].iloc[-1]))

In [7]:
df = pd.DataFrame(experiment_results)
df.to_csv("seed_dependecy_check.csv")

In [None]:
from scipy import stats


for split in SPLITS:
    print(split)
    for i in SIZES:
        precision_data = df[(df["split"] == split) & (df["size"] == i)].describe()["Precision"]
                
        # find the mean with a confidence interval of
        sample_std = precision_data["std"]
        sample_mean = precision_data["mean"]
        sample_size = len(df[(df["split"] == split) & (df["size"] == i)])
        
        alpha = 0.05
        t_crit = stats.t.ppf(1 - alpha/2, df=sample_size-1)
        sem    = sample_std / np.sqrt(sample_size)
        margin = t_crit * sem
        lower  = sample_mean - margin
        upper  = sample_mean + margin

        print(f"Precision Mean: {sample_mean:.4f} ± {margin:.4f}")
        print(f"IC 95%: [{lower:.4f}, {upper:.4f}]")

digits
Precision Mean: 0.5377 ± 0.0796
IC 95%: [0.4581, 0.6173]
Precision Mean: 0.6989 ± 0.0860
IC 95%: [0.6128, 0.7849]
Precision Mean: 0.7783 ± 0.0896
IC 95%: [0.6887, 0.8679]
Precision Mean: 0.7806 ± 0.0819
IC 95%: [0.6987, 0.8626]
Precision Mean: 0.8543 ± 0.0621
IC 95%: [0.7921, 0.9164]
Precision Mean: 0.8218 ± 0.0567
IC 95%: [0.7651, 0.8785]
Precision Mean: 0.7767 ± 0.0945
IC 95%: [0.6822, 0.8711]
Precision Mean: 0.8459 ± 0.0770
IC 95%: [0.7689, 0.9229]
Precision Mean: 0.8264 ± 0.0706
IC 95%: [0.7559, 0.8970]
Precision Mean: 0.8504 ± 0.0728
IC 95%: [0.7776, 0.9232]
fashion
Precision Mean: 0.4998 ± 0.0945
IC 95%: [0.4053, 0.5943]
Precision Mean: 0.5481 ± 0.0774
IC 95%: [0.4707, 0.6256]
Precision Mean: 0.5945 ± 0.0546
IC 95%: [0.5400, 0.6491]
Precision Mean: 0.6255 ± 0.0805
IC 95%: [0.5450, 0.7060]
Precision Mean: 0.6301 ± 0.0836
IC 95%: [0.5466, 0.7137]
Precision Mean: 0.6496 ± 0.0814
IC 95%: [0.5683, 0.7310]
Precision Mean: 0.6946 ± 0.0855
IC 95%: [0.6091, 0.7800]
Precision Mean: 

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

# Asume que df, SPLITS y SIZES ya están definidos:
#   SPLITS = df['split'].unique()
#   SIZES  = sorted(df['size'].unique())
METRICS = ['Precision', 'Recall', 'F1_score', 'Accuracy', 'AUC_ROC']


for split in SPLITS:
    plt.figure()
    for metric in METRICS:
        means, lowers, uppers = [], [], []
        for size in SIZES:
            sub = df[(df['split'] == split) & (df['size'] == size)][metric]
            sample_mean = sub.mean()
            sample_std  = sub.std(ddof=1)
            n           = sub.count()

            # IC 95 % de la media
            alpha  = 0.05
            t_crit = stats.t.ppf(1 - alpha/2, df=n-1)
            sem    = sample_std / np.sqrt(n)
            margin = t_crit * sem

            means.append(sample_mean)
            lowers.append(sample_mean - margin)
            uppers.append(sample_mean + margin)

        # Línea de la media y sombreado del IC para esta métrica
        plt.plot(SIZES, means, marker='o', label=metric)
        plt.fill_between(SIZES, lowers, uppers, alpha=0.2)

    plt.xlabel('Size')
    plt.ylabel('Metric value')
    plt.title(f'With a CI of 95%: {split}')
    # Mueve la leyenda fuera, a la derecha
    plt.legend(title='Metric', loc='center left', bbox_to_anchor=(1, 0.5))
    plt.tight_layout()  # ajusta para que nada se recorte
    plt.savefig(f"{split}_metrics.png")
    plt.close()

In [16]:
df[df["Precision"] == df[df["split"] == "digits"]["Precision"].max()]

Unnamed: 0,split,seed,size,Precision,Recall,F1_score,Accuracy,AUC_ROC
31,digits,5,2,0.994384,0.994375,0.994377,0.998875,0.996875


In [28]:
model = rebuild_model("digits", 2, os.path.join(RESULT_PATH, "ordenador1", "iViT_RESULTS", "experiment_pipeline_5", "digits", "2","model.pth"))
test = build_dataset("digits")
true, pred , _  = get_results(test, model)
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)
plot_and_save_confusion_matrix(true, pred, "best_digits.png", total_params)

Total parameters: 360177


In [15]:
df[df["Precision"] == df[df["split"] == "balanced"]["Precision"].max()]

Unnamed: 0,split,seed,size,Precision,Recall,F1_score,Accuracy,AUC_ROC
430,balanced,16,1,0.857837,0.857819,0.857198,0.99395,0.927364


In [30]:
model = rebuild_model("balanced", 1, os.path.join(RESULT_PATH, "ordenador3", "iViT_RESULTS", "experiment_pipeline_16", "balanced", "1","model.pth"))
test = build_dataset("balanced")
true, pred , _  = get_results(test, model)
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)
plot_and_save_confusion_matrix(true, pred, "best_balanced.png", total_params)

Total parameters: 343028


In [14]:
df[df["Precision"] == df[df["split"] == "fashion"]["Precision"].max()]

Unnamed: 0,split,seed,size,Precision,Recall,F1_score,Accuracy,AUC_ROC
530,fashion,15,1,0.89444,0.8936,0.893962,0.97872,0.940889


In [None]:
model = rebuild_model("fashion", 1, os.path.join(RESULT_PATH, "ordenador3", "iViT_RESULTS", "experiment_pipeline_15", "fashion", "1","model.pth"))
fashion_test = build_dataset("fashion")
true, pred , _  = get_results(fashion_test, model)
total_params = sum(p.numel() for p in model.parameters())
print("Total parameters:", total_params)
plot_and_save_confusion_matrix(true, pred, "best_fashion.png", total_params)