In [1]:
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
import os

# Modelo ViT

In [2]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # ViT espera 3 canales
    transforms.Resize((224,224)),                 # ViT espera 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])  # normalización básica
])


In [3]:
ruta_carpeta_actual = os.getcwd()
data_dir = os.path.join(ruta_carpeta_actual, "PuntosMuestra_CR_tinto_synthetic_images")

dataset = datasets.ImageFolder(root=data_dir, transform=transform)

num_classes = len(dataset.classes)
print("Número de clases:", num_classes)

Número de clases: 7


## Limpieza de datos

In [4]:
# --- eliminar clase 02  ---
desc_cat = "02"
if desc_cat in dataset.class_to_idx:
    idx_c2 = dataset.class_to_idx[desc_cat]
    dataset.samples = [s for s in dataset.samples if s[1] != idx_c2]
    dataset.targets = [t for t in dataset.targets if t != idx_c2]
    print(f" Clase {desc_cat} eliminada (índice antiguo {idx_c2}).")
else:
    print(f"ℹ️ No existe carpeta {desc_cat}; no se filtró nada.")

# --- remapear etiquetas a 0..K-1 (crítico) ---
unique_old = sorted(set(t for _, t in dataset.samples))
old2new = {old:i for i, old in enumerate(unique_old)}
dataset.samples = [(p, old2new[t]) for (p, t) in dataset.samples]
dataset.targets = [old2new[t] for t in dataset.targets]

# reconstruir mapping de nombres de carpeta a índices nuevos
idx2class = {v:k for k,v in dataset.class_to_idx.items()}
dataset.class_to_idx = {idx2class[old]: new for old, new in old2new.items()}
dataset.classes = [c for c,_ in sorted(dataset.class_to_idx.items(), key=lambda kv: kv[1])]

num_classes = len(dataset.classes)
print("Clases activas:", dataset.classes)
print("Nuevo class_to_idx:", dataset.class_to_idx)
print("num_classes:", num_classes)

 Clase 02 eliminada (índice antiguo 1).
Clases activas: ['01', '03', '04', '06', '07', '10']
Nuevo class_to_idx: {'01': 0, '03': 1, '04': 2, '06': 3, '07': 4, '10': 5}
num_classes: 6


## Preparar entrenamiento

In [5]:
from torch.utils.data import random_split, DataLoader

n = len(dataset)
n_tr = int(0.7*n)
train_ds, test_ds = random_split(dataset, [n_tr, n-n_tr], generator=torch.Generator().manual_seed(42))

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True,  num_workers=2, pin_memory=torch.cuda.is_available())
test_dl  = DataLoader(test_ds,  batch_size=64, shuffle=False, num_workers=2, pin_memory=torch.cuda.is_available())

## Definir modelo

In [6]:
import torch.nn as nn
from torchvision import models

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = models.vit_b_16(weights=None)  # (o weights="IMAGENET1K_V1" para fine-tuning)
model.heads = nn.Linear(model.heads.head.in_features, num_classes)
model = model.to(device)

## Entrenamiento y validacion

In [8]:
import time
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

# --- entrenar SOLO la cabeza (linear probe) ---
# congela todo
for p in model.parameters():
    p.requires_grad = False

# localiza la cabeza (torchvision usa "heads", timm usa "head")
head = getattr(model, "head", None) or getattr(model, "heads", None)
if head is None:
    raise RuntimeError("No se encontró 'head'/'heads' en el modelo ViT.")
for p in head.parameters():
    p.requires_grad = True

# optimizador SOLO de la cabeza (más rápido)
opt  = torch.optim.Adam(head.parameters(), lr=2e-4)
crit = nn.CrossEntropyLoss()

use_cuda = torch.cuda.is_available()
scaler = torch.cuda.amp.GradScaler(enabled=use_cuda)
if use_cuda:
    torch.backends.cudnn.benchmark = True  # acelera convoluciones si aplica

# --- entrenamiento (pocas épocas y AMP) ---
epochs = 1  # prueba rápida; sube a 2–3 cuando quieras
t0 = time.perf_counter()
for _ in range(epochs):
    model.train()
    for x, y in train_dl:
        x, y = x.to(device, non_blocking=use_cuda), y.to(device, non_blocking=use_cuda)
        opt.zero_grad(set_to_none=True)
        with torch.cuda.amp.autocast(enabled=use_cuda):
            logits = model(x)
            loss   = crit(logits, y)
        scaler.scale(loss).backward()
        scaler.step(opt)
        scaler.update()
t1 = time.perf_counter()
fit_s = t1 - t0
print(f"⏱ ViT – entrenamiento (linear probe, {epochs} ep): {fit_s:.2f}s")

# --- inferencia (AMP y no_grad) ---
t0 = time.perf_counter()
y_true, y_pred = [], []
model.eval()
with torch.no_grad(), torch.cuda.amp.autocast(enabled=use_cuda):
    for x, y in test_dl:
        x = x.to(device, non_blocking=use_cuda)
        logits = model(x)
        preds = logits.argmax(1).cpu().tolist()
        y_pred += preds
        y_true += y.tolist()
t1 = time.perf_counter()
pred_s = t1 - t0
print(f"⏱ ViT – predicción: {pred_s:.3f}s  ({pred_s/len(y_true)*1000:.2f} ms/muestra)")

  scaler = torch.cuda.amp.GradScaler(enabled=use_cuda)
  with torch.cuda.amp.autocast(enabled=use_cuda):


KeyboardInterrupt: 

In [None]:
# DEMORA DEMASIADO | 1era version

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

opt = torch.optim.Adam(model.parameters(), lr=1e-4)
crit = nn.CrossEntropyLoss()

for epoch in range(5):   # empieza con 5 épocas para probar
    # ---- Entrenamiento ----
    model.train()
    for x,y in train_dl:
        x,y = x.to(device), y.to(device)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()

    # ---- Validación ----
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x,y in test_dl:
            x,y = x.to(device), y.to(device)
            preds = model(x).argmax(1)
            correct += (preds==y).sum().item()
            total += y.numel()
    print(f"Epoch {epoch+1}: Acc={correct/total:.3f}")


## Metricas

In [None]:
# ----- 6) Métricas extras (opcional) -----
# --- métricas ---
oa  = accuracy_score(y_true, y_pred)
f1m = f1_score(y_true, y_pred, average="macro")
cm  = confusion_matrix(y_true, y_pred)

print(f"OA: {oa:.3f}")
print(f"F1 macro: {f1m:.3f}")
print("Matriz de confusión:\n", cm)
print("\nReporte por clase:\n", classification_report(y_true, y_pred, target_names=dataset.classes, digits=3))

In [None]:

# 1era version
from sklearn.metrics import classification_report, confusion_matrix

y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for x,y in test_dl:
        x,y = x.to(device), y.to(device)
        preds = model(x).argmax(1)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

print(classification_report(y_true, y_pred, digits=3))
print(confusion_matrix(y_true, y_pred))
