# 04 · Optimización de Hiperparámetros — AVSI
**Artificial Vision Stacking Inspection** · *2025-10-22*

Este notebook ejecuta una **búsqueda en malla (Grid Search)** con PyTorch/torchvision para **ResNet-18** y analiza la **sensibilidad** de hiperparámetros clave:
- `lr` (learning rate)
- `weight_decay`
- `batch_size`
- `freeze_backbone` (congelar o no el backbone)
- `epochs` (corto para búsqueda)

Genera:
- Tabla de resultados por configuración
- Curvas de validación del **mejor experimento**
- Gráficas de sensibilidad por hiperparámetro


## 1. Configuración y utilidades

In [None]:

import os, json, time, itertools
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

from sklearn.model_selection import ParameterGrid

ROOT = Path('.').resolve()
DATA_PROC = ROOT / 'data' / 'processed'
MODELS_DIR = ROOT / 'models'
RESULTS_DIR = ROOT / 'results' / 'metrics'
FIG_DIR = ROOT / 'results' / 'figures'

for d in [MODELS_DIR, RESULTS_DIR, FIG_DIR]:
    d.mkdir(parents=True, exist_ok=True)

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('DEVICE:', DEVICE)


## 2. Datos (train/val/test)

In [None]:

train_dir = DATA_PROC / 'train'
val_dir   = DATA_PROC / 'val'
test_dir  = DATA_PROC / 'test'
assert train_dir.exists() and val_dir.exists() and test_dir.exists(), "Ejecuta 02_preprocesamiento antes."

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

IMG_SIZE = 224
common_tfms = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
])

train_ds = datasets.ImageFolder(train_dir, transform=common_tfms)
val_ds   = datasets.ImageFolder(val_dir, transform=common_tfms)

classes = train_ds.classes
num_classes = len(classes)
print('Clases:', classes)


## 3. Definición del modelo y funciones de entrenamiento

In [None]:

def build_model(num_classes, freeze_backbone=True):
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    if freeze_backbone:
        for p in model.parameters():
            p.requires_grad = False
    in_features = model.fc.in_features
    model.fc = nn.Linear(in_features, num_classes)
    return model

def run_epoch(model, loader, optimizer, criterion, train=True):
    if train:
        model.train()
    else:
        model.eval()
    running_loss, correct, total = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(DEVICE), y.to(DEVICE)
        if train:
            optimizer.zero_grad()
        with torch.set_grad_enabled(train):
            out = model(x)
            loss = criterion(out, y)
            if train:
                loss.backward()
                optimizer.step()
        running_loss += loss.item() * x.size(0)
        pred = out.argmax(1)
        correct += (pred == y).sum().item()
        total += y.size(0)
    return running_loss/total, correct/total


## 4. Espacio de búsqueda

In [None]:

param_grid = {
    'lr': [1e-4, 5e-4, 1e-3],
    'weight_decay': [0.0, 1e-4],
    'batch_size': [16, 32],
    'freeze_backbone': [True, False],
    'epochs': [5]  # corto para búsqueda; luego entrenar más en 03 con la mejor config
}
list(ParameterGrid(param_grid))[:3]


## 5. Búsqueda en malla

In [None]:

results = []
best = {'val_acc': 0.0}
grid = list(ParameterGrid(param_grid))

for i, cfg in enumerate(grid, 1):
    print(f"\n>>> Config {i}/{len(grid)}: {cfg}")
    model = build_model(num_classes, freeze_backbone=cfg['freeze_backbone']).to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    criterion = nn.CrossEntropyLoss()

    train_loader = DataLoader(train_ds, batch_size=cfg['batch_size'], shuffle=True,  num_workers=2)
    val_loader   = DataLoader(val_ds,   batch_size=cfg['batch_size'], shuffle=False, num_workers=2)

    history = {'train_loss':[], 'train_acc':[], 'val_loss':[], 'val_acc':[]}
    for epoch in range(1, cfg['epochs']+1):
        tr_loss, tr_acc = run_epoch(model, train_loader, optimizer, criterion, train=True)
        val_loss, val_acc = run_epoch(model, val_loader, optimizer, criterion, train=False)
        history['train_loss'].append(tr_loss)
        history['train_acc'].append(tr_acc)
        history['val_loss'].append(val_loss)
        history['val_acc'].append(val_acc)
        print(f"  epoch {epoch}: tr_acc={tr_acc:.3f} val_acc={val_acc:.3f}")

    best_val = max(history['val_acc']) if history['val_acc'] else 0.0
    row = dict(cfg)
    row.update({'val_acc': best_val})
    results.append(row)

    # Guardar curva y modelo provisional si es el mejor
    if best_val > best['val_acc']:
        best = {'val_acc': best_val, 'cfg': cfg, 'history': history}
        # Guardar histórico mejor
        pd.DataFrame(history).to_csv(RESULTS_DIR / 'best_grid_history.csv', index=False)
        torch.save({'model_state': model.state_dict(), 'classes': classes, 'cfg': cfg},
                   MODELS_DIR / 'best_grid_model.pt')

df_results = pd.DataFrame(results).sort_values('val_acc', ascending=False).reset_index(drop=True)
df_results.to_csv(RESULTS_DIR / 'grid_results.csv', index=False)
df_results.head()


## 6. Sensibilidad por hiperparámetro

In [None]:

if not df_results.empty:
    # Promedio por cada valor de hiperparámetro
    def plot_param_sensitivity(df, param):
        agg = df.groupby(param)['val_acc'].mean().reset_index()
        plt.figure()
        plt.bar(agg[param].astype(str), agg['val_acc'])
        plt.title(f'Sensibilidad: {param} vs val_acc')
        plt.xlabel(param)
        plt.ylabel('val_acc promedio')
        plt.savefig(FIG_DIR / f'sensitivity_{param}.png', bbox_inches='tight')
        plt.show()
        return agg

    sens_lr = plot_param_sensitivity(df_results, 'lr')
    sens_wd = plot_param_sensitivity(df_results, 'weight_decay')
    sens_bs = plot_param_sensitivity(df_results, 'batch_size')
    sens_fr = plot_param_sensitivity(df_results, 'freeze_backbone')
else:
    print('[Aviso] No hay resultados para graficar.')


## 7. Mejor configuración y curvas

In [None]:

print('Top-5 configuraciones:')
display(df_results.head(5))

if (RESULTS_DIR / 'best_grid_history.csv').exists():
    hist = pd.read_csv(RESULTS_DIR / 'best_grid_history.csv')
    plt.figure()
    plt.plot(hist['train_loss'], label='train_loss')
    plt.plot(hist['val_loss'], label='val_loss')
    plt.title('Pérdida (mejor grid)')
    plt.xlabel('Época')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(FIG_DIR / 'best_grid_loss.png', bbox_inches='tight')
    plt.show()

    plt.figure()
    plt.plot(hist['train_acc'], label='train_acc')
    plt.plot(hist['val_acc'], label='val_acc')
    plt.title('Exactitud (mejor grid)')
    plt.xlabel('Época')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.savefig(FIG_DIR / 'best_grid_acc.png', bbox_inches='tight')
    plt.show()
