In [3]:
import torch
import torch.nn as nn
from torchvision import models, datasets, transforms
import os

# Modelo ViT

In [1]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # ViT espera 3 canales
    transforms.Resize((224,224)),                 # ViT espera 224x224
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])  # normalización básica
])


In [4]:
ruta_carpeta_actual = os.getcwd()
data_dir = os.path.join(ruta_carpeta_actual, "PuntosMuestra_CR_tinto_synthetic_images")

dataset = datasets.ImageFolder(root=data_dir, transform=transform)

num_classes = len(dataset.classes)
print("Número de clases:", num_classes)

Número de clases: 7


## Preparar entrenamiento

In [5]:
from torch.utils.data import random_split, DataLoader

# Dividir en train (70%) y test (30%)
n = len(dataset)
n_train = int(0.7 * n)
train_ds, test_ds = random_split(dataset, [n_train, n - n_train])

train_dl = DataLoader(train_ds, batch_size=32, shuffle=True)
test_dl = DataLoader(test_ds, batch_size=64)


## Definir modelo

In [6]:
# import torch.nn as nn
# from torchvision import models

# Crear Vision Transformer base (16x16 patches, imagen 224x224)
model = models.vit_b_16(weights=None)   # o weights="IMAGENET1K_V1" si quieres fine-tuning
model.heads = nn.Linear(model.heads.head.in_features, num_classes)


## Entrenamiento y validacion

In [7]:
# pip install timm  (si no lo tienes)
import timm
from torch.utils.data import DataLoader, Subset
from sklearn.metrics import classification_report

# ----- 0) Dispositivo -----
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# ----- 1) Transforms (224, 3 canales) -----
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3),
])

# ----- 2) Dataset y SUBMUESTREO rápido -----
data_dir = os.path.join(os.getcwd(), "PuntosMuestra_CR_tinto_synthetic_images")
full_ds = datasets.ImageFolder(root=data_dir, transform=transform)
num_classes = len(full_ds.classes)

# Toma un subconjunto pequeño y estratificado por carpetas (aprox.)
# Aquí: hasta 80 imágenes por clase (ajusta si quieres más/menos)
indices = []
max_per_class = 80
counts = {c:0 for c in range(num_classes)}
for i, (_, y) in enumerate(full_ds.samples):
    if counts[y] < max_per_class:
        indices.append(i); counts[y]+=1
small_ds = Subset(full_ds, indices)

# Split 70/30
n = len(small_ds); n_tr = int(0.7*n)
train_ds, test_ds = torch.utils.data.random_split(small_ds, [n_tr, n-n_tr], generator=torch.Generator().manual_seed(42))

train_dl = DataLoader(train_ds, batch_size=16, shuffle=True, num_workers=2, pin_memory=(device.type=="cuda"))
test_dl  = DataLoader(test_ds,  batch_size=32, shuffle=False, num_workers=2, pin_memory=(device.type=="cuda"))

print(f"Submuestra usada: {len(small_ds)} (train={len(train_ds)}, test={len(test_ds)})")

# ----- 3) Modelo: ViT tiny preentrenado y solo cabeza entrenable -----
model = timm.create_model("vit_tiny_patch16_224", pretrained=True, num_classes=num_classes)
# Congela todo excepto la cabeza
for p in model.parameters():
    p.requires_grad = False
for p in model.head.parameters():
    p.requires_grad = True

model = model.to(device)

# ----- 4) Optimizador y loss -----
opt = torch.optim.Adam(model.head.parameters(), lr=2e-4)  # solo cabeza
crit = nn.CrossEntropyLoss()

# ----- 5) Entrenamiento corto -----
epochs = 3
for ep in range(epochs):
    model.train()
    for x,y in train_dl:
        x,y = x.to(device), y.to(device)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()

    # Validación
    model.eval(); correct=total=0
    with torch.no_grad():
        for x,y in test_dl:
            x,y = x.to(device), y.to(device)
            pred = model(x).argmax(1)
            correct += (pred==y).sum().item()
            total += y.numel()
    print(f"Epoch {ep+1}/{epochs}  Acc={correct/total:.3f}")

  from .autonotebook import tqdm as notebook_tqdm


Device: cpu
Submuestra usada: 482 (train=337, test=145)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


Epoch 1/3  Acc=0.214
Epoch 2/3  Acc=0.345
Epoch 3/3  Acc=0.469


In [None]:
# DEMORA DEMASIADO | 1era version

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

opt = torch.optim.Adam(model.parameters(), lr=1e-4)
crit = nn.CrossEntropyLoss()

for epoch in range(5):   # empieza con 5 épocas para probar
    # ---- Entrenamiento ----
    model.train()
    for x,y in train_dl:
        x,y = x.to(device), y.to(device)
        opt.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        opt.step()

    # ---- Validación ----
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for x,y in test_dl:
            x,y = x.to(device), y.to(device)
            preds = model(x).argmax(1)
            correct += (preds==y).sum().item()
            total += y.numel()
    print(f"Epoch {epoch+1}: Acc={correct/total:.3f}")


## Metricas

In [8]:
# ----- 6) Métricas extras (opcional) -----
y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for x,y in test_dl:
        x,y = x.to(device), y.to(device)
        pred = model(x).argmax(1)
        y_true += y.cpu().tolist()
        y_pred += pred.cpu().tolist()

print(classification_report(y_true, y_pred, digits=3, target_names=full_ds.classes))

              precision    recall  f1-score   support

          01      0.000     0.000     0.000        26
          02      0.000     0.000     0.000         1
          03      0.737     0.519     0.609        27
          04      0.806     1.000     0.893        25
          06      0.371     0.565     0.448        23
          07      0.000     0.000     0.000        23
          10      0.271     0.800     0.405        20

    accuracy                          0.469       145
   macro avg      0.312     0.412     0.336       145
weighted avg      0.373     0.469     0.394       145



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:

# 1era version
from sklearn.metrics import classification_report, confusion_matrix

y_true, y_pred = [], []
model.eval()
with torch.no_grad():
    for x,y in test_dl:
        x,y = x.to(device), y.to(device)
        preds = model(x).argmax(1)
        y_true.extend(y.cpu().numpy())
        y_pred.extend(preds.cpu().numpy())

print(classification_report(y_true, y_pred, digits=3))
print(confusion_matrix(y_true, y_pred))
