# 03 · Modelado — AVSI
**Artificial Vision Stacking Inspection** · *2025-10-22*

Entrenamiento de un clasificador **ResNet-18 (transfer learning)** para inspección de apilamiento.
Carga dataset desde `data/processed/` (train/val/test), entrena, evalúa y guarda el **mejor modelo**.


## 1. Configuración y dependencias

In [None]:

import os, json, time
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Rutas
ROOT = Path('.').resolve()
DATA_PROC = ROOT / 'data' / 'processed'
MODELS_DIR = ROOT / 'models'
RESULTS_DIR = ROOT / 'results' / 'metrics'
FIG_DIR = ROOT / 'results' / 'figures'

for d in [MODELS_DIR, RESULTS_DIR, FIG_DIR]:
    d.mkdir(parents=True, exist_ok=True)

print('DATA_PROC:', DATA_PROC)
print('MODELS_DIR:', MODELS_DIR)
print('RESULTS_DIR:', RESULTS_DIR)
print('FIG_DIR:', FIG_DIR)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE


## 2. Hiperparámetros

In [None]:

CFG = {
    'img_size': 224,
    'batch_size': 32,
    'num_workers': 2,
    'epochs': 10,
    'lr': 1e-3,
    'weight_decay': 1e-4,
    'patience': 3,        # early stopping
    'freeze_backbone': True
}
print(json.dumps(CFG, indent=2))


## 3. Carga de datos (train/val/test)

In [None]:

train_dir = DATA_PROC / 'train'
val_dir   = DATA_PROC / 'val'
test_dir  = DATA_PROC / 'test'

if not train_dir.exists() or not val_dir.exists() or not test_dir.exists():
    raise FileNotFoundError("No se encontró la estructura data/processed/{train,val,test}. Ejecuta 02_preprocesamiento primero.")

# Transformaciones (normalización ImageNet)
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

train_tfms = transforms.Compose([
    transforms.Resize((CFG['img_size'], CFG['img_size'])),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])
eval_tfms = transforms.Compose([
    transforms.Resize((CFG['img_size'], CFG['img_size'])),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

train_ds = datasets.ImageFolder(train_dir, transform=train_tfms)
val_ds   = datasets.ImageFolder(val_dir, transform=eval_tfms)
test_ds  = datasets.ImageFolder(test_dir, transform=eval_tfms)

classes = train_ds.classes
num_classes = len(classes)
print('Clases:', classes, '| num_classes =', num_classes)

train_loader = DataLoader(train_ds, batch_size=CFG['batch_size'], shuffle=True,  num_workers=CFG['num_workers'])
val_loader   = DataLoader(val_ds,   batch_size=CFG['batch_size'], shuffle=False, num_workers=CFG['num_workers'])
test_loader  = DataLoader(test_ds,  batch_size=CFG['batch_size'], shuffle=False, num_workers=CFG['num_workers'])
len(train_ds), len(val_ds), len(test_ds)


## 4. Modelo ResNet-18 (transfer learning)

In [None]:

model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
if CFG['freeze_backbone']:
    for p in model.parameters():
        p.requires_grad = False

# Reemplazar la capa final
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, num_classes)
model = model.to(DEVICE)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=CFG['lr'], weight_decay=CFG['weight_decay'])


## 5. Entrenamiento y validación

In [None]:

def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        running_loss += loss.item() * imgs.size(0)
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
    return running_loss / total, correct / total

history = {'train_loss':[], 'train_acc':[], 'val_loss':[], 'val_acc':[]}
best_val_acc = 0.0
best_path = MODELS_DIR / 'best_model.pt'
epochs_no_improve = 0

for epoch in range(1, CFG['epochs']+1):
    tr_loss, tr_acc = train_one_epoch(model, train_loader, optimizer, criterion, DEVICE)
    val_loss, val_acc = evaluate(model, val_loader, criterion, DEVICE)

    history['train_loss'].append(tr_loss)
    history['train_acc'].append(tr_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

    print(f"[{epoch:02d}/{CFG['epochs']}] train_loss={tr_loss:.4f} | train_acc={tr_acc:.4f} | val_loss={val_loss:.4f} | val_acc={val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save({'model_state': model.state_dict(),
                    'classes': classes,
                    'cfg': CFG}, best_path)
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= CFG['patience']:
            print("Early stopping activado.")
            break

# Guardar histórico
pd.DataFrame(history).to_csv(RESULTS_DIR / 'history.csv', index=False)
print('Mejor val_acc:', best_val_acc, '| modelo guardado en:', best_path)


## 6. Curvas de entrenamiento

In [None]:

hist = pd.read_csv(RESULTS_DIR / 'history.csv')

plt.figure()
plt.plot(hist['train_loss'], label='train_loss')
plt.plot(hist['val_loss'], label='val_loss')
plt.title('Pérdida')
plt.xlabel('Época')
plt.ylabel('Loss')
plt.legend()
plt.savefig(FIG_DIR / 'loss.png', bbox_inches='tight')
plt.show()

plt.figure()
plt.plot(hist['train_acc'], label='train_acc')
plt.plot(hist['val_acc'], label='val_acc')
plt.title('Exactitud')
plt.xlabel('Época')
plt.ylabel('Accuracy')
plt.legend()
plt.savefig(FIG_DIR / 'accuracy.png', bbox_inches='tight')
plt.show()


## 7. Evaluación en test

In [None]:

# Cargar mejor modelo
ckpt = torch.load(MODELS_DIR / 'best_model.pt', map_location=DEVICE)
model.load_state_dict(ckpt['model_state'])
model.eval()

y_true, y_pred = [], []
with torch.no_grad():
    for imgs, labels in test_loader:
        imgs = imgs.to(DEVICE)
        outputs = model(imgs)
        preds = outputs.argmax(dim=1).cpu().numpy()
        y_pred.extend(preds)
        y_true.extend(labels.numpy())

acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)
report = classification_report(y_true, y_pred, target_names=classes, output_dict=True)

print('Accuracy (test):', acc)
pd.DataFrame(cm, index=classes, columns=classes).to_csv(RESULTS_DIR / 'confusion_matrix.csv')
pd.DataFrame(report).to_csv(RESULTS_DIR / 'classification_report.csv')

with open(RESULTS_DIR / 'summary.json', 'w') as f:
    json.dump({'test_accuracy': acc, 'best_val_acc': float(ckpt.get('best_val_acc', 0.0)),
               'classes': classes, 'cfg': ckpt.get('cfg', {})}, f, indent=2)

# Mostrar matriz de confusión
plt.figure()
plt.imshow(cm, interpolation='nearest')
plt.title('Matriz de confusión')
plt.xlabel('Predicción')
plt.ylabel('Real')
plt.colorbar()
plt.savefig(FIG_DIR / 'confusion_matrix.png', bbox_inches='tight')
plt.show()


## 8. Inferencia: función de predicción para una imagen

In [None]:

from PIL import Image

def load_best_model(model_path=MODELS_DIR / 'best_model.pt'):
    ckpt = torch.load(model_path, map_location=DEVICE)
    model = models.resnet18(weights=None)  # arquitectura base
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, len(ckpt['classes']))
    model.load_state_dict(ckpt['model_state'])
    model.eval()
    return model, ckpt['classes']

infer_tfms = transforms.Compose([
    transforms.Resize((CFG['img_size'], CFG['img_size'])),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]),
])

def predict_image(img_path, model=None, classes=None):
    if model is None or classes is None:
        model, classes = load_best_model()
    img = Image.open(img_path).convert('RGB')
    x = infer_tfms(img).unsqueeze(0)
    with torch.no_grad():
        outputs = model(x)
        probs = torch.softmax(outputs, dim=1).numpy().squeeze()
        idx = int(np.argmax(probs))
    return classes[idx], float(probs[idx]), {c: float(p) for c, p in zip(classes, probs)}

# Ejemplo de uso (comentar/editar la ruta según tu imagen)
# pred, conf, dist = predict_image('data/processed/test/good_stack/ejemplo.jpg')
# print(pred, conf)


## 9. Próximos pasos
- Descongelar capas superiores del backbone y **fine-tuning** (mejorar mAP/IoU si usas detector).
- Ajustar `batch_size`, `lr`, `epochs` y regularización.
- Registrar métricas adicionales y guardar curvas en `results/figures/`.
- Exportar a `best_model.onnx` si planeas despliegue en otras plataformas.
