In [22]:
import nibabel as nib
import numpy as np
import os

def dice_coefficient(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    return (2. * intersection) / (np.sum(y_true) + np.sum(y_pred) + 1e-6)

def precision(y_true, y_pred):
    tp = np.sum(y_true * y_pred)
    fp = np.sum((1 - y_true) * y_pred)
    return tp / (tp + fp + 1e-6)

def sensitivity(y_true, y_pred):
    tp = np.sum(y_true * y_pred)
    fn = np.sum(y_true * (1 - y_pred))
    return tp / (tp + fn + 1e-6)

def accuracy(y_true, y_pred):
    tp_tn = np.sum(y_true == y_pred)
    return tp_tn / y_true.size

def get_metrics(dataset, subset):
    GT_path = f"{dataset}/{subset}/UPENN-GBM-{subset}_11_combined2_approx_segm.nii.gz"
    Segm_path = f"{dataset}/{subset}/{subset}segmentation.nii.gz"

    # Cargar las imágenes
    GT_img = nib.load(GT_path).get_fdata()
    Segm_img = nib.load(Segm_path).get_fdata()
    GT_img = np.round(GT_img).astype(np.uint8)
    Segm_img = Segm_img.astype(np.uint8) 

    # Definir las etiquetas a evaluar
    labels = [(6, 4), (2, 1)]
    names= ["infiltracion", "edema"]

    metrics_per_label = {label: {"Dice": [], "Precision": [], "Sensitivity": [], "Accuracy": []} for label in labels}
    i=0
    for gt_label, segm_label in labels:
        y_true = (GT_img == gt_label).astype(np.uint8)
        y_pred = (Segm_img == segm_label).astype(np.uint8)

        dice = dice_coefficient(y_true, y_pred)
        prec = precision(y_true, y_pred)
        sens = sensitivity(y_true, y_pred)
        acc = accuracy(y_true, y_pred)

        metrics_per_label[(gt_label, segm_label)]["Dice"].append(dice)
        metrics_per_label[(gt_label, segm_label)]["Precision"].append(prec)
        metrics_per_label[(gt_label, segm_label)]["Sensitivity"].append(sens)
        metrics_per_label[(gt_label, segm_label)]["Accuracy"].append(acc)

        print(f"Subset {subset} - {names[i]}:")
        print(f"  Dice: {dice:.4f}, Precision: {prec:.4f}, Sensitivity: {sens:.4f}, Accuracy: {acc:.4f}")
        i+=1

    return metrics_per_label

# Dataset y subsets
dataset = "./trained_models/segmentations_scale_GT"
# subsets = ['00045', "00055", "00084", "00086", "00128", ]
# Obtener todas las carpetas dentro del dataset
subsets = [folder for folder in os.listdir(dataset) if os.path.isdir(os.path.join(dataset, folder))]
print("Carpetas encontradas:", subsets)

# Estructura para acumular todas las métricas
all_metrics = {label: {"Dice": [], "Precision": [], "Sensitivity": [], "Accuracy": []} for label in [(6, 4), (2, 1)]}

# Obtener métricas para cada subset
for s in subsets:
    subset_metrics = get_metrics(dataset, s)
    for label in all_metrics.keys():
        for metric in all_metrics[label]:
            all_metrics[label][metric].extend(subset_metrics[label][metric])

# Calcular promedio y desviación estándar por etiqueta
print("\n===== MÉTRICAS GLOBALES =====")
for label in all_metrics:
    print(f"\nMétricas para GT({label[0]}) vs Segm({label[1]}):")
    for metric, values in all_metrics[label].items():
        mean_val = np.mean(values)
        std_val = np.std(values)
        print(f"  {metric}: Media = {mean_val:.4f}, Std = {std_val:.4f}")





Carpetas encontradas: ['00134', '00045', '00352', '00307', '00128', '00285', '00084', '00055', '00129', '00353', '00086']
Subset 00134 - infiltracion:
  Dice: 0.4050, Precision: 0.4155, Sensitivity: 0.3951, Accuracy: 0.9996
Subset 00134 - edema:
  Dice: 0.6936, Precision: 0.9145, Sensitivity: 0.5587, Accuracy: 0.9987
Subset 00045 - infiltracion:
  Dice: 0.5034, Precision: 0.3579, Sensitivity: 0.8483, Accuracy: 0.9989
Subset 00045 - edema:
  Dice: 0.7049, Precision: 0.8729, Sensitivity: 0.5912, Accuracy: 0.9983
Subset 00352 - infiltracion:
  Dice: 0.2831, Precision: 0.1833, Sensitivity: 0.6214, Accuracy: 0.9991
Subset 00352 - edema:
  Dice: 0.5255, Precision: 0.7807, Sensitivity: 0.3960, Accuracy: 0.9985
Subset 00307 - infiltracion:
  Dice: 0.6184, Precision: 0.6894, Sensitivity: 0.5607, Accuracy: 0.9998
Subset 00307 - edema:
  Dice: 0.5701, Precision: 0.7719, Sensitivity: 0.4519, Accuracy: 0.9993
Subset 00128 - infiltracion:
  Dice: 0.2130, Precision: 0.1736, Sensitivity: 0.2755, Accur

In [28]:
import nibabel as nib
import numpy as np
import os

# 🔹 Función para calcular el coeficiente Dice
def dice_coefficient(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    return (2. * intersection) / (np.sum(y_true) + np.sum(y_pred) + 1e-6)

# 🔹 Función para calcular IoU (Intersection over Union)
def iou(y_true, y_pred):
    intersection = np.sum(y_true * y_pred)
    union = np.sum(y_true) + np.sum(y_pred) - intersection
    return intersection / (union + 1e-6)

# 🔹 Función para calcular precisión
def precision(y_true, y_pred):
    tp = np.sum(y_true * y_pred)
    fp = np.sum((1 - y_true) * y_pred)
    return tp / (tp + fp + 1e-6)

# 🔹 Función para calcular sensibilidad (Recall)
def sensitivity(y_true, y_pred):
    tp = np.sum(y_true * y_pred)
    fn = np.sum(y_true * (1 - y_pred))
    return tp / (tp + fn + 1e-6)

# 🔹 Función para calcular Balanced Accuracy (Accuracy por clase, excluyendo fondo)
def balanced_accuracy(y_true, y_pred):
    recalls = []
    specificities = []
    unique_labels = np.unique(y_true)
    
    for label in unique_labels:
        if label == 0: continue  # Ignorar fondo
        
        y_true_bin = (y_true == label).astype(np.uint8)
        y_pred_bin = (y_pred == label).astype(np.uint8)

        tp = np.sum(y_true_bin * y_pred_bin)
        fn = np.sum(y_true_bin * (1 - y_pred_bin))
        tn = np.sum((1 - y_true_bin) * (1 - y_pred_bin))
        fp = np.sum((1 - y_true_bin) * y_pred_bin)

        recall = tp / (tp + fn + 1e-6)
        specificity = tn / (tn + fp + 1e-6)
        
        recalls.append(recall)
        specificities.append(specificity)

    return np.mean(recalls), np.mean(specificities)  # Retorna ambas métricas


# 🔹 Función para calcular métricas
def get_metrics(dataset, subset):
    GT_path = f"{dataset}/{subset}/UPENN-GBM-{subset}_11_combined2_approx_segm.nii.gz"
    Segm_path = f"{dataset}/{subset}/{subset}segmentation.nii.gz"

    # Cargar las imágenes
    GT_img = nib.load(GT_path).get_fdata()
    Segm_img = nib.load(Segm_path).get_fdata()
    GT_img = np.round(GT_img).astype(np.uint8)
    Segm_img = Segm_img.astype(np.uint8) 

    # Definir las etiquetas a evaluar
    labels = [(6, 4), (2, 1)]
    metrics_results = []

    for gt_label, segm_label in labels:
        y_true = (GT_img == gt_label).astype(np.uint8)
        y_pred = (Segm_img == segm_label).astype(np.uint8)

        dice = dice_coefficient(y_true, y_pred)
        iou_value = iou(y_true, y_pred)
        prec = precision(y_true, y_pred)
        sens = sensitivity(y_true, y_pred)
        # sensi, spec = balanced_accuracy(y_true, y_pred)
        # bal_acc = (sensi + spec) / 2  # Balanced Accuracy real

        # Máscara que excluye el fondo
        foreground_mask = (y_true > 0) | (y_pred > 0)

        # Aplicar la máscara
        fp = np.sum((y_true == 0) & (y_pred == 1) & foreground_mask)
        tn = np.sum((y_true == 0) & (y_pred == 0) & foreground_mask)

        # Evitar división por cero
        spec= tn / (tn + fp) if (tn + fp) > 0 else 0
        bal_acc = (sens + spec) / 2

        metrics_results.append([dice, iou_value, prec, sens, bal_acc])

        print(f"Subset {subset} - GT({gt_label}) vs Segm({segm_label}):")
        print(f"  Dice: {dice:.4f}, IoU: {iou_value:.4f}, Precision: {prec:.4f}, Sensitivity: {sens:.4f}, ,Balanced Accuracy: {bal_acc:.4f}")

    return np.array(metrics_results)


# 🔹 Obtener todas las carpetas dentro del dataset
dataset = "./trained_models/segmentations_scale_GT"
subsets = sorted([d for d in os.listdir(dataset) if os.path.isdir(os.path.join(dataset, d))])

# 🔹 Almacenar métricas para calcular promedios y desviaciones estándar
all_metrics = []

for s in subsets:
    metrics = get_metrics(dataset, s)
    all_metrics.append(metrics)

# 🔹 Convertir a numpy para cálculos estadísticos
all_metrics = np.array(all_metrics)  # Dimensión: (N_subsets, N_labels, N_métricas)

# 🔹 Calcular promedio y desviación estándar por métrica y etiqueta
metric_names = ["Dice", "IoU", "Precision", "Sensitivity", "Balanced Accuracy"]

print("\n🔹 Promedios y Desviaciones Estándar por métrica y etiqueta:\n")
for i, (gt_label, segm_label) in enumerate([(6, 4), (2, 1)]):
    print(f"🔹 GT({gt_label}) vs Segm({segm_label}):")
    for j, metric_name in enumerate(metric_names):
        mean_val = np.mean(all_metrics[:, i, j])
        std_val = np.std(all_metrics[:, i, j])
        print(f"  {metric_name}: {mean_val:.4f} ± {std_val:.4f}")
    print()


Subset 00045 - GT(6) vs Segm(4):
  Dice: 0.5034, IoU: 0.3363, Precision: 0.3579, Sensitivity: 0.8483, ,Balanced Accuracy: 0.4241
Subset 00045 - GT(2) vs Segm(1):
  Dice: 0.7049, IoU: 0.5443, Precision: 0.8729, Sensitivity: 0.5912, ,Balanced Accuracy: 0.2956
Subset 00055 - GT(6) vs Segm(4):
  Dice: 0.6598, IoU: 0.4923, Precision: 0.7484, Sensitivity: 0.5900, ,Balanced Accuracy: 0.2950
Subset 00055 - GT(2) vs Segm(1):
  Dice: 0.7228, IoU: 0.5659, Precision: 0.6734, Sensitivity: 0.7800, ,Balanced Accuracy: 0.3900
Subset 00084 - GT(6) vs Segm(4):
  Dice: 0.1446, IoU: 0.0779, Precision: 0.0892, Sensitivity: 0.3806, ,Balanced Accuracy: 0.1903
Subset 00084 - GT(2) vs Segm(1):
  Dice: 0.8309, IoU: 0.7107, Precision: 0.9512, Sensitivity: 0.7376, ,Balanced Accuracy: 0.3688
Subset 00086 - GT(6) vs Segm(4):
  Dice: 0.4092, IoU: 0.2573, Precision: 0.4887, Sensitivity: 0.3520, ,Balanced Accuracy: 0.1760
Subset 00086 - GT(2) vs Segm(1):
  Dice: 0.8129, IoU: 0.6848, Precision: 0.9116, Sensitivity: 0.7