# primer paso
lo que hacemos aqui es ver si el problema al que nos enfrentamos es multiclase o es multietiqueta, eso nos sirve para ver el mejor aproach al momento de entrenar el modelo


In [4]:
from pathlib import Path          # Manejo de rutas (más limpio que strings)
import pandas as pd               # Leer el CSV y manipular data tabular
import numpy as np                # Operaciones numéricas, arreglos
from PIL import Image, ImageFile  # Cargar imágenes .jpg/.png de disco

import torch                      # Núcleo de PyTorch (tensores, device)
from torch import nn              # Capas y pérdidas (Linear, CrossEntropyLoss, etc.)
from torch.utils.data import Dataset, DataLoader  # Dataset/DataLoader
import torchvision                # Ecosistema visión en PyTorch
from torchvision import transforms as T, models   # Preprocesos y modelos preentrenados

from collections import defaultdict, Counter      # Diccionarios útiles (bolsas por lesión, conteos)


Es un problema multiclase, con cada foto representando solo una enfermedad y no muchas compartidas, con esto ya podemos comenzar a realizar el dataset junto con las etiquetas para lograr entrenar el modelo

# creacion del data set

Este data set, espero haber descargado el correcto desde la pagina, tiene 2 imagenes por lesion, una de lejos y otra de cerca, primero pense en usar solo 1 imagen por separado, y ya ver de esas cuantas el modelo predecia bien, pero creo que en un consultorio un medico podria tomar varias fotos de la malformacion, por ello creo que es mejor entrenarlas en conjunto, es decir buscar la forma de usar las 2 imagenes para una sola prediccion. por ello en este paso crearemos primero un data set que nos permita hacer eso.

In [6]:
# Permitir abrir JPGs incompletos (común en copias a USB)
ImageFile.LOAD_TRUNCATED_IMAGES = True

# Rutas (ajusta a las tuyas)
DATA_DIR = Path("/Users/kevinjiro/Desktop/data Piel/Train/MILK10k_Training_Input") # carpeta con imágenes
CSV_PATH  = Path("/Users/kevinjiro/Desktop/data Piel/Train/MILK10k_Training_GroundTruth.csv") # CSV con etiquetas
TEST_DIR  = Path("/Users/kevinjiro/Desktop/data Piel/test")   # lo reservamos para más adelante

# CPU vs GPU (usa GPU si está disponible)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Reproducibilidad razonable (semillas)
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)


In [7]:
num_classes = 11  # placeholder; lo calcularemos del CSV en el siguiente paso

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)  # pesos ImageNet
model.fc = nn.Linear(model.fc.in_features, num_classes)  # re-cabeceamos a tus clases
model = model.to(DEVICE)


In [8]:
# Leer el CSV
df = pd.read_csv(CSV_PATH)

# Columnas de clases (todas menos lesion_id)
class_cols = [c for c in df.columns if c != "lesion_id"]
num_classes = len(class_cols)
print("Clases:", class_cols)
print("Número de clases:", num_classes)

# Vector numpy con las clases (one-hot)
labels_array = df[class_cols].values

# Diccionario lesion_id -> índice de clase
lesion2label = {
    lid: int(labels_array[i].argmax())
    for i, lid in enumerate(df["lesion_id"])
}

# Chequeo rápido
print("Ejemplo:", list(lesion2label.items())[:5])


Clases: ['AKIEC', 'BCC', 'BEN_OTH', 'BKL', 'DF', 'INF', 'MAL_OTH', 'MEL', 'NV', 'SCCKA', 'VASC']
Número de clases: 11
Ejemplo: [('IL_0000652', 1), ('IL_0003176', 1), ('IL_0004688', 1), ('IL_0005081', 9), ('IL_0006177', 1)]


In [10]:
samples = []

for lid, label_idx in lesion2label.items():
    folder = DATA_DIR / lid
    if not folder.exists():
        continue
    for img_path in folder.glob("*.jpg"):
        if img_path.name.startswith("._"):   # ignorar basura de macOS
            continue
        samples.append((img_path, label_idx, lid))

print("Total imágenes encontradas:", len(samples))
print("Ejemplo de sample:", samples[0])


Total imágenes encontradas: 10480
Ejemplo de sample: (PosixPath('/Users/kevinjiro/Desktop/data Piel/Train/MILK10k_Training_Input/IL_0000652/ISIC_4671410.jpg'), 1, 'IL_0000652')


In [11]:
from collections import Counter, defaultdict

# ¿cuántas imágenes por lesión quedaron en samples?
imgs_por_lesion = Counter([lid for _, _, lid in samples])
print("Lesiones con >1 imagen:", sum(v>1 for v in imgs_por_lesion.values()))
print("Ejemplo (una lesión con varias imágenes):",
      next(((lid, imgs_por_lesion[lid]) for lid in imgs_por_lesion if imgs_por_lesion[lid]>1), "Ninguna"))

import numpy as np
from collections import defaultdict

rng = np.random.default_rng(42)
val_frac = 0.2

# 1) agrupar lesiones por clase
class2lesions = defaultdict(list)
for lid, y in lesion2label.items():
    # solo lesiones que sí tienen imágenes en samples
    # (más robusto: derivarlo de imgs_por_lesion)
    if imgs_por_lesion[lid] > 0:
        class2lesions[y].append(lid)

# 2) split estratificado por clase (por lesion_id)
train_lids, val_lids = [], []
for y, lids in class2lesions.items():
    lids = np.array(lids)
    rng.shuffle(lids)
    n_val = max(1, int(len(lids)*val_frac))
    val_lids.extend(lids[:n_val].tolist())
    train_lids.extend(lids[n_val:].tolist())

# 3) expandir a nivel imagen
train_samples, val_samples = [], []
for p, y, lid in samples:
    if lid in set(train_lids):
        train_samples.append((p, y, lid))
    elif lid in set(val_lids):
        val_samples.append((p, y, lid))

print("Lesiones train:", len(set(train_lids)), "| imágenes train:", len(train_samples))
print("Lesiones val:",   len(set(val_lids)),   "| imágenes val:",   len(val_samples))
print("Intersección de lesion_id:", set(train_lids).intersection(val_lids))  # debe ser set()


Lesiones con >1 imagen: 5240
Ejemplo (una lesión con varias imágenes): ('IL_0000652', 2)
Lesiones train: 4197 | imágenes train: 8394
Lesiones val: 1043 | imágenes val: 2086
Intersección de lesion_id: set()


In [12]:
import torchvision.transforms as T

img_size = 224

train_tfms = T.Compose([
    T.Resize((img_size, img_size)),
    T.RandomHorizontalFlip(p=0.5),
    T.RandomRotation(degrees=10),
    T.ColorJitter(brightness=0.15, contrast=0.15, saturation=0.1, hue=0.02),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

val_tfms = T.Compose([
    T.Resize((img_size, img_size)),
    T.ToTensor(),
    T.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])


In [13]:
from PIL import Image, ImageFile
from torch.utils.data import Dataset, DataLoader
import torch

ImageFile.LOAD_TRUNCATED_IMAGES = True  # abre JPGs truncados sin romper

class LesionDataset(Dataset):
    """
    samples: lista de tuplas (ruta_imagen, class_idx, lesion_id)
    """
    def __init__(self, samples, transforms):
        self.samples = samples
        self.transforms = transforms

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        path, y, lid = self.samples[idx]
        try:
            x = Image.open(path).convert("RGB")
        except Exception as e:
            raise RuntimeError(f"Error abriendo {path}: {e}")
        x = self.transforms(x)
        y = torch.tensor(y, dtype=torch.long)  # CrossEntropy → entero
        return x, y, str(lid)

# Instancias
train_ds = LesionDataset(train_samples, train_tfms)
val_ds   = LesionDataset(val_samples,   val_tfms)

# Para depurar primero con 0 workers
train_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=0, pin_memory=False)
val_dl   = DataLoader(val_ds,   batch_size=64, shuffle=False, num_workers=0, pin_memory=False)

# Quick check
xb, yb, lidb = next(iter(val_dl))
print("xb:", xb.shape)           # (B, 3, 224, 224)
print("yb:", yb.shape, yb[:5])   # (B,)
print("lid ejemplo:", lidb[:5])  # strings de lesion_id


xb: torch.Size([64, 3, 224, 224])
yb: torch.Size([64]) tensor([1, 1, 8, 8, 1])
lid ejemplo: ('IL_0003176', 'IL_0003176', 'IL_0008891', 'IL_0008891', 'IL_0019048')


In [14]:
import torch.nn as nn
import torchvision.models as models

num_classes = len(class_cols)  # de tu CSV leído antes

model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)


In [15]:
from collections import defaultdict

@torch.no_grad()
def evaluate_late_fusion(model, loader, device=DEVICE):
    model.eval()
    total_img, correct_img, loss_img = 0, 0, 0.0

    lesion_logits_sum = defaultdict(lambda: torch.zeros(num_classes, device=device))
    lesion_counts     = defaultdict(int)
    lesion_targets    = {}

    for xb, yb, lidb in loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        logits = model(xb)                    # (B, C)
        loss   = criterion(logits, yb)
        loss_img += loss.item() * xb.size(0)

        preds_img = logits.argmax(dim=1)
        correct_img += (preds_img == yb).sum().item()
        total_img   += xb.size(0)

        for j, lid in enumerate(lidb):
            lesion_logits_sum[lid] += logits[j]
            lesion_counts[lid]     += 1
            lesion_targets[lid]     = int(yb[j].item())

    # métricas por lesión
    correct_lesion, total_lesion = 0, 0
    for lid, zsum in lesion_logits_sum.items():
        zmean = zsum / lesion_counts[lid]
        pred  = int(zmean.argmax().item())
        ytrue = lesion_targets[lid]
        correct_lesion += int(pred == ytrue)
        total_lesion   += 1

    img_acc    = correct_img / max(1, total_img)
    img_loss   = loss_img / max(1, total_img)
    lesion_acc = correct_lesion / max(1, total_lesion)

    return {"img_acc": img_acc, "img_loss": img_loss, "lesion_acc": lesion_acc,
            "n_images": total_img, "n_lesions": total_lesion}

# Prueba (sin entrenar aún; la acc será baja, sólo validamos el pipeline)
metrics = evaluate_late_fusion(model, val_dl)
print(metrics)


{'img_acc': 0.1860019175455417, 'img_loss': 2.300363646257644, 'lesion_acc': 0.20038350910834132, 'n_images': 2086, 'n_lesions': 1043}


In [16]:
import torch
from collections import defaultdict
from typing import Dict, List, Tuple

@torch.no_grad()
def evaluate_late_fusion_with_preds(
    model,
    loader,
    num_classes: int,
    criterion,
    device=None,
):
    """
    Devuelve:
      - metrics: dict con acc/loss a nivel imagen y lesión
      - y_true_lesion: lista[int] con la clase real por lesión
      - y_pred_lesion: lista[int] con la clase predicha por lesión (late fusion)
    """
    if device is None:
        device = next(model.parameters()).device
    model.eval()

    total_img, correct_img, loss_img = 0, 0, 0.0

    # Acumuladores por lesión
    lesion_logits_sum = defaultdict(lambda: torch.zeros(num_classes, device=device))
    lesion_counts     = defaultdict(int)
    lesion_targets    = {}

    for xb, yb, lidb in loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)

        logits = model(xb)                 # (B, C)
        loss   = criterion(logits, yb)
        loss_img += loss.item() * xb.size(0)

        preds_img = logits.argmax(1)
        correct_img += (preds_img == yb).sum().item()
        total_img   += xb.size(0)

        # acumular por lesión (late fusion)
        for j, lid in enumerate(lidb):
            lesion_logits_sum[lid] += logits[j]
            lesion_counts[lid]     += 1
            lesion_targets[lid]     = int(yb[j].item())

    # construye y_true / y_pred por lesión
    y_true_lesion: List[int] = []
    y_pred_lesion: List[int] = []
    for lid, zsum in lesion_logits_sum.items():
        zmean = zsum / lesion_counts[lid]
        y_pred = int(zmean.argmax().item())
        y_true = lesion_targets[lid]
        y_pred_lesion.append(y_pred)
        y_true_lesion.append(y_true)

    metrics = {
        "img_acc": correct_img / max(1, total_img),
        "img_loss": loss_img / max(1, total_img),
        "lesion_acc": (sum(int(p==t) for p,t in zip(y_pred_lesion,y_true_lesion))
                       / max(1, len(y_true_lesion))),
        "n_images": total_img,
        "n_lesions": len(y_true_lesion),
    }
    return metrics, y_true_lesion, y_pred_lesion


In [17]:
metrics, y_true, y_pred = evaluate_late_fusion_with_preds(
    model, val_dl, num_classes=len(class_cols), criterion=criterion, device=DEVICE
)

from sklearn.metrics import classification_report, confusion_matrix, f1_score
print(metrics)
print(classification_report(y_true, y_pred, target_names=class_cols, digits=3))
print(confusion_matrix(y_true, y_pred))

# F1 macro por si lo quieres suelto
print("F1-macro (lesión):", f1_score(y_true, y_pred, average="macro"))


{'img_acc': 0.1860019175455417, 'img_loss': 2.300363646257644, 'lesion_acc': 0.20038350910834132, 'n_images': 2086, 'n_lesions': 1043}
              precision    recall  f1-score   support

       AKIEC      0.083     0.017     0.028        60
         BCC      0.453     0.258     0.329       504
     BEN_OTH      0.000     0.000     0.000         8
         BKL      0.140     0.259     0.182       108
          DF      0.000     0.000     0.000        10
         INF      0.000     0.000     0.000        10
     MAL_OTH      0.000     0.000     0.000         1
         MEL      0.078     0.333     0.127        90
          NV      0.000     0.000     0.000       149
       SCCKA      0.182     0.213     0.196        94
        VASC      0.000     0.000     0.000         9

    accuracy                          0.200      1043
   macro avg      0.085     0.098     0.078      1043
weighted avg      0.261     0.200     0.208      1043

[[  1   7   1  18   0   0   1  25   0   7   0]
 [  3

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [3]:
import torch.nn as nn
import torchvision.models as models

def build_model(model_name: str, num_classes: int, device=DEVICE):
    model_name = model_name.lower()
    if model_name == "resnet18":
        model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        in_feats = model.fc.in_features
        model.fc = nn.Linear(in_feats, num_classes)

    elif model_name == "mobilenetv3_small":
        model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1)
        in_feats = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(in_feats, num_classes)

    elif model_name == "efficientnet_b0":
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
        in_feats = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(in_feats, num_classes)

    else:
        raise ValueError(f"Modelo no soportado: {model_name}")

    return model.to(device)


In [19]:
import copy
import pandas as pd
from torch.cuda.amp import GradScaler

def train_and_validate_model(model_name, epochs=10, base_lr=1e-3):
    # Construir modelo
    model = build_model(model_name, num_classes=num_classes, device=DEVICE)

    # Optimizador y scheduler (idénticos a tu setup)
    optimizer = torch.optim.Adam(model.parameters(), lr=base_lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="max", factor=0.5, patience=2
    )
    scaler = GradScaler(enabled=(DEVICE.type == "cuda"))

    # Reusar train_one_epoch tal cual (usa scaler global si lo necesitas dentro)
    history = []
    best_lesion_acc = -1.0
    best_state = None

    for epoch in range(1, epochs+1):
        train_metrics = train_one_epoch(model, train_dl, optimizer, criterion, device=DEVICE)
        val_metrics   = evaluate_late_fusion(model, val_dl, device=DEVICE)

        # Scheduler según tu métrica clínica
        scheduler.step(val_metrics["lesion_acc"])

        # Guardar mejor estado
        if val_metrics["lesion_acc"] > best_lesion_acc:
            best_lesion_acc = val_metrics["lesion_acc"]
            best_state = {
                "model": copy.deepcopy(model.state_dict()),
                "optimizer": copy.deepcopy(optimizer.state_dict()),
                "epoch": epoch,
                "val_metrics": val_metrics,
                "model_name": model_name
            }

        # Logging
        lr_now = optimizer.param_groups[0]["lr"]
        row = {"epoch": epoch, "model": model_name, "lr": lr_now, **train_metrics, **val_metrics}
        history.append(row)
        print(f"[{model_name}] Epoch {epoch:02d} | "
              f"train_img_acc={train_metrics['train_img_acc']:.3f} "
              f"train_img_loss={train_metrics['train_img_loss']:.3f} || "
              f"val_img_acc={val_metrics['img_acc']:.3f} "
              f"val_lesion_acc={val_metrics['lesion_acc']:.3f} "
              f"val_img_loss={val_metrics['img_loss']:.3f} | lr={lr_now:.2e}")

    # Restaurar mejor y guardar a disco
    if best_state is not None:
        model.load_state_dict(best_state["model"])
        torch.save(best_state, f"best_{model_name}.pth")
        print(f"[{model_name}] Mejor checkpoint restaurado (lesion_acc={best_lesion_acc:.3f})")

    return model, pd.DataFrame(history), best_state

# === Ejecutar comparación ===
results = []
best_states = {}

for name in ["resnet18", "mobilenetv3_small", "efficientnet_b0"]:
    model_trained, hist_df, best_state = train_and_validate_model(name, epochs=EPOCHS, base_lr=1e-3)
    best_states[name] = best_state
    # Tomar última fila y además guardar la mejor métrica por-lesión
    last = hist_df.iloc[-1].to_dict()
    last["best_lesion_acc"] = best_state["val_metrics"]["lesion_acc"] if best_state else None
    results.append(last)

# Tabla comparativa (última época + mejor lesion_acc)
compare_df = pd.DataFrame(results)[[
    "model", "epoch", "lr", "train_img_acc", "train_img_loss",
    "img_acc", "img_loss", "lesion_acc", "best_lesion_acc"
]].rename(columns={
    "img_acc": "val_img_acc",
    "img_loss": "val_img_loss",
    "lesion_acc": "val_lesion_acc"
}).sort_values(by="best_lesion_acc", ascending=False)

print("\n=== COMPARACIÓN DE MODELOS (ordenado por mejor lesion_acc) ===")
print(compare_df.to_string(index=False))


  scaler = GradScaler(enabled=(DEVICE.type == "cuda"))
  with autocast(enabled=(device.type == "cuda")):


[resnet18] Epoch 01 | train_img_acc=0.576 train_img_loss=1.332 || val_img_acc=0.573 val_lesion_acc=0.593 val_img_loss=1.491 | lr=1.00e-03
[resnet18] Epoch 02 | train_img_acc=0.604 train_img_loss=1.214 || val_img_acc=0.593 val_lesion_acc=0.621 val_img_loss=1.299 | lr=1.00e-03
[resnet18] Epoch 03 | train_img_acc=0.625 train_img_loss=1.148 || val_img_acc=0.581 val_lesion_acc=0.606 val_img_loss=1.215 | lr=1.00e-03
[resnet18] Epoch 04 | train_img_acc=0.636 train_img_loss=1.112 || val_img_acc=0.625 val_lesion_acc=0.654 val_img_loss=1.097 | lr=1.00e-03
[resnet18] Epoch 05 | train_img_acc=0.651 train_img_loss=1.065 || val_img_acc=0.636 val_lesion_acc=0.647 val_img_loss=1.140 | lr=1.00e-03
[resnet18] Epoch 06 | train_img_acc=0.652 train_img_loss=1.041 || val_img_acc=0.641 val_lesion_acc=0.661 val_img_loss=1.081 | lr=1.00e-03
[resnet18] Epoch 07 | train_img_acc=0.656 train_img_loss=1.024 || val_img_acc=0.642 val_lesion_acc=0.667 val_img_loss=1.104 | lr=1.00e-03
[resnet18] Epoch 08 | train_img_ac

100%|██████████| 9.83M/9.83M [00:01<00:00, 5.33MB/s]


[mobilenetv3_small] Epoch 01 | train_img_acc=0.609 train_img_loss=1.201 || val_img_acc=0.651 val_lesion_acc=0.664 val_img_loss=1.080 | lr=1.00e-03
[mobilenetv3_small] Epoch 02 | train_img_acc=0.656 train_img_loss=1.024 || val_img_acc=0.643 val_lesion_acc=0.685 val_img_loss=1.050 | lr=1.00e-03
[mobilenetv3_small] Epoch 03 | train_img_acc=0.682 train_img_loss=0.952 || val_img_acc=0.657 val_lesion_acc=0.689 val_img_loss=1.020 | lr=1.00e-03
[mobilenetv3_small] Epoch 04 | train_img_acc=0.695 train_img_loss=0.892 || val_img_acc=0.644 val_lesion_acc=0.691 val_img_loss=1.060 | lr=1.00e-03
[mobilenetv3_small] Epoch 05 | train_img_acc=0.715 train_img_loss=0.837 || val_img_acc=0.655 val_lesion_acc=0.690 val_img_loss=1.046 | lr=1.00e-03
[mobilenetv3_small] Epoch 06 | train_img_acc=0.734 train_img_loss=0.774 || val_img_acc=0.644 val_lesion_acc=0.683 val_img_loss=1.129 | lr=1.00e-03
[mobilenetv3_small] Epoch 07 | train_img_acc=0.746 train_img_loss=0.722 || val_img_acc=0.634 val_lesion_acc=0.680 val_

100%|██████████| 20.5M/20.5M [00:03<00:00, 6.66MB/s]


[efficientnet_b0] Epoch 01 | train_img_acc=0.615 train_img_loss=1.221 || val_img_acc=0.655 val_lesion_acc=0.681 val_img_loss=1.074 | lr=1.00e-03
[efficientnet_b0] Epoch 02 | train_img_acc=0.664 train_img_loss=1.027 || val_img_acc=0.672 val_lesion_acc=0.701 val_img_loss=0.994 | lr=1.00e-03
[efficientnet_b0] Epoch 03 | train_img_acc=0.691 train_img_loss=0.922 || val_img_acc=0.678 val_lesion_acc=0.717 val_img_loss=0.959 | lr=1.00e-03
[efficientnet_b0] Epoch 04 | train_img_acc=0.709 train_img_loss=0.865 || val_img_acc=0.689 val_lesion_acc=0.737 val_img_loss=0.949 | lr=1.00e-03
[efficientnet_b0] Epoch 05 | train_img_acc=0.726 train_img_loss=0.807 || val_img_acc=0.678 val_lesion_acc=0.728 val_img_loss=0.956 | lr=1.00e-03
[efficientnet_b0] Epoch 06 | train_img_acc=0.741 train_img_loss=0.741 || val_img_acc=0.617 val_lesion_acc=0.680 val_img_loss=1.134 | lr=1.00e-03
[efficientnet_b0] Epoch 07 | train_img_acc=0.766 train_img_loss=0.678 || val_img_acc=0.682 val_lesion_acc=0.724 val_img_loss=0.986

In [19]:
# =========================
# Evaluación por-lesión (late fusion) de 3 modelos usando checkpoints previos
# =========================

from pathlib import Path
from collections import defaultdict
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.models as models
from sklearn.metrics import classification_report, confusion_matrix

# ---- Paths a tus checkpoints (los que ya generaste) ----
CKPTS = {
    "resnet18": "/Users/kevinjiro/Desktop/data Piel/Model/best_resnet18.pth",
    "mobilenetv3_small": "/Users/kevinjiro/Desktop/data Piel/Model/best_mobilenetv3_small.pth",
    "efficientnet_b0": "/Users/kevinjiro/Desktop/data Piel/Model/best_efficientnet_b0.pth",
}

# ---- Fábrica de arquitectura (mismo head a 11 clases) ----
def build_model(model_name: str, num_classes: int, device=DEVICE):
    name = model_name.lower()
    if name == "resnet18":
        model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        in_feats = model.fc.in_features
        model.fc = nn.Linear(in_feats, num_classes)
    elif name == "mobilenetv3_small":
        model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.IMAGENET1K_V1)
        in_feats = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(in_feats, num_classes)
    elif name == "efficientnet_b0":
        model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.IMAGENET1K_V1)
        in_feats = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(in_feats, num_classes)
    else:
        raise ValueError(f"Modelo no soportado: {model_name}")
    return model.to(device)

def load_checkpoint_state(path: str):
    path = Path(path)
    assert path.exists(), f"No existe el checkpoint: {path}"
    state = torch.load(path, map_location=DEVICE)
    assert "model" in state, "El checkpoint debe tener la clave 'model' con state_dict"
    return state["model"]

# ---- Predicciones por-lesión con late fusion (como en tu val) ----
@torch.no_grad()
def preds_per_lesion(model, loader, device=DEVICE):
    model.eval()
    lesion_logits_sum = defaultdict(lambda: torch.zeros(num_classes, device=device))
    lesion_counts     = defaultdict(int)
    lesion_targets    = {}

    for xb, yb, lidb in loader:
        xb = xb.to(device, non_blocking=True)
        yb = yb.to(device, non_blocking=True)
        logits = model(xb)  # (B, C)
        for j, lid in enumerate(lidb):
            lesion_logits_sum[lid] += logits[j]
            lesion_counts[lid]     += 1
            lesion_targets[lid]     = int(yb[j].item())

    y_true, y_pred = [], []
    for lid, zsum in lesion_logits_sum.items():
        zmean = zsum / lesion_counts[lid]
        y_true.append(lesion_targets[lid])
        y_pred.append(int(zmean.argmax().item()))

    return np.array(y_true), np.array(y_pred)

# ---- Evaluación y exportación por modelo ----
OUT_DIR = Path("./eval_train_split")  # resultados en validación
OUT_DIR.mkdir(parents=True, exist_ok=True)

all_reports = {}   # para tenerlos en memoria también

for model_name, ckpt_path in CKPTS.items():
    print(f"\n=== {model_name.upper()} ===")
    # 1) construir arquitectura y cargar pesos
    model = build_model(model_name, num_classes=num_classes, device=DEVICE)
    model.load_state_dict(load_checkpoint_state(ckpt_path))

    # 2) predicciones por lesión con late fusion
    y_true, y_pred = preds_per_lesion(model, val_dl, device=DEVICE)

    # 3) matriz de confusión (filas=real, columnas=predicha)
    cm = confusion_matrix(y_true, y_pred, labels=list(range(len(class_cols))))
    cm_df = pd.DataFrame(cm, index=class_cols, columns=class_cols)
    print("\nMatriz de confusión (filas=real, columnas=predicha):")
    print(cm_df)

    # 4) classification report (precision, recall, f1, support) por clase
    report_dict = classification_report(
        y_true, y_pred, target_names=class_cols, output_dict=True, digits=3
    )
    report_df = pd.DataFrame(report_dict).transpose()
    print("\nClassification report:")
    print(report_df)

    # 5) guardar a disco
    cm_path = OUT_DIR / f"confusion_matrix_{model_name}.csv"
    rep_path = OUT_DIR / f"classification_report_{model_name}.csv"
    cm_df.to_csv(cm_path)
    report_df.to_csv(rep_path)
    print(f"\nGuardado: {cm_path}")
    print(f"Guardado: {rep_path}")

    all_reports[model_name] = report_df

# (Opcional) pequeña tabla resumen comparando macro/weighted
summary_rows = []
for name, rep in all_reports.items():
    summary_rows.append({
        "model": name,
        "macro_precision":  rep.loc["macro avg", "precision"],
        "macro_recall":     rep.loc["macro avg", "recall"],
        "macro_f1":         rep.loc["macro avg", "f1-score"],
        "weighted_precision": rep.loc["weighted avg", "precision"],
        "weighted_recall":    rep.loc["weighted avg", "recall"],
        "weighted_f1":        rep.loc["weighted avg", "f1-score"],
        "accuracy":           rep.loc["accuracy", "precision"] if "accuracy" in rep.index else None
    })
summary_df = pd.DataFrame(summary_rows).sort_values("macro_f1", ascending=False)
print("\n=== RESUMEN COMPARATIVO (ordenado por macro_f1) ===")
print(summary_df.to_string(index=False))
summary_df.to_csv(OUT_DIR / "summary_models_validation.csv", index=False)
print(f"\nGuardado resumen: {OUT_DIR / 'summary_models_validation.csv'}")



=== RESNET18 ===

Matriz de confusión (filas=real, columnas=predicha):
         AKIEC  BCC  BEN_OTH  BKL  DF  INF  MAL_OTH  MEL   NV  SCCKA  VASC
AKIEC        0   27        0   18   0    0        0    0    1     14     0
BCC          0  453        0   11   0    0        0    4    2     34     0
BEN_OTH      0    4        0    2   0    0        0    2    0      0     0
BKL          0   40        0   32   0    0        0    7   17     12     0
DF           0    5        0    2   0    0        0    2    1      0     0
INF          0    8        0    2   0    0        0    0    0      0     0
MAL_OTH      0    0        0    1   0    0        0    0    0      0     0
MEL          0    8        0   16   0    0        0   37   28      1     0
NV           0   17        0    9   0    0        0   11  111      1     0
SCCKA        0   21        0    3   0    0        0    1    0     69     0
VASC         0    4        0    1   0    0        0    0    2      0     2

Classification report:
    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Matriz de confusión (filas=real, columnas=predicha):
         AKIEC  BCC  BEN_OTH  BKL  DF  INF  MAL_OTH  MEL   NV  SCCKA  VASC
AKIEC       20   25        0    8   0    0        0    1    0      6     0
BCC         11  465        0    3   0    0        0    3    2     18     2
BEN_OTH      0    1        2    1   0    0        0    1    2      0     1
BKL          9   37        0   34   1    0        0   13   10      4     0
DF           0    5        0    1   3    0        0    1    0      0     0
INF          1    4        0    2   0    0        0    1    1      0     1
MAL_OTH      0    0        0    0   0    0        0    1    0      0     0
MEL          2    6        0    6   0    0        0   54   21      0     1
NV           0   14        0    7   1    0        0   12  112      1     2
SCCKA        3   24        0   10   0    0        0    0    0     57     0
VASC         0    1        0    0   1    0        0    1    1      0     5

Classification report:
              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Matriz de confusión (filas=real, columnas=predicha):
         AKIEC  BCC  BEN_OTH  BKL  DF  INF  MAL_OTH  MEL   NV  SCCKA  VASC
AKIEC       23   19        0    4   0    0        0    3    1      9     1
BCC          7  472        0    4   0    0        0    3    3     13     2
BEN_OTH      1    1        2    0   0    0        0    2    1      0     1
BKL          9   28        0   40   1    0        0   11   12      7     0
DF           0    3        0    1   5    0        0    0    1      0     0
INF          0    3        0    1   1    2        0    0    1      1     1
MAL_OTH      0    0        0    0   0    0        0    1    0      0     0
MEL          2    5        0    2   0    0        0   55   24      0     2
NV           0   11        0    3   0    0        0   15  119      1     0
SCCKA        4   21        0    5   0    0        0    0    0     64     0
VASC         0    1        0    0   0    0        0    0    1      0     7

Classification report:
              precisio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
