#### Lab4

Esta vez si pondre un seed para reproducibidad, error del lab anterior no usar esto jeje

In [6]:
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import numpy as np
import random

# semillas
SEED = 42
random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED)
torch.backends.cudnn.benchmark = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
is_cuda = torch.cuda.is_available()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_ds = datasets.MNIST(root='./data', train=True,  download=True, transform=transform)
test_ds  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# evita el warning de pin_memory si no hay GPU
train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=2, pin_memory=is_cuda)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False, num_workers=2, pin_memory=is_cuda)


EL modelo CNN

In [7]:
# 2.2 Modelo base: 3 conv + 2 maxpool + FC
class MNIST_CNN(nn.Module):
    def __init__(self, fc_hidden=256, c1=32, c2=64, c3=128, dropout=0.25):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1,  c1, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(c1, c2, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 28->14
            nn.Dropout(dropout),

            nn.Conv2d(c2, c3, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 14->7
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(c3*7*7, fc_hidden), nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(fc_hidden, 10)
        )
    def forward(self, x):
        return self.classifier(self.features(x))


Entrenamiento + evaluación + checkpoint del mejor

In [8]:
criterion = nn.CrossEntropyLoss()

def accuracy_from_logits(logits, y):
    return (logits.argmax(dim=1) == y).float().mean().item()

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    loss_accum, correct, n = 0.0, 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)
        b = y.size(0)
        loss_accum += loss.item() * b
        correct += (logits.argmax(1) == y).sum().item()
        n += b
    return loss_accum/n, correct/n

def train(model, train_loader, test_loader, optimizer, epochs=5, save_prefix=None):
    best = {"epoch": -1, "val_acc": 0.0}
    for ep in range(1, epochs+1):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()

        val_loss, val_acc = evaluate(model, test_loader)
        print(f"Epoch {ep}: val_loss={val_loss:.4f} val_acc={val_acc:.4f}")

        if val_acc > best["val_acc"]:
            best = {"epoch": ep, "val_acc": float(val_acc)}
            if save_prefix is not None:
                torch.save({
                    "epoch": ep,
                    "val_acc": float(val_acc),
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                }, f"{save_prefix}.pth")
    print("Best:", best)
    return best


Se va a guardar el mejor modelo en un pth para usarlo despues

In [None]:
model = MNIST_CNN().to(device)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
best_base = train(model, train_loader, test_loader, optimizer, epochs=5, save_prefix="best_cnn_base_state")


Epoch 1: val_loss=0.0377 val_acc=0.9874
Epoch 2: val_loss=0.0287 val_acc=0.9904


Bucle de configs guarda el mejor

In [None]:
def count_params(m): return sum(p.numel() for p in m.parameters())

configs = [
    {"fc_hidden": 128, "c1": 16, "c2": 32, "c3": 64,  "opt": "Adam", "lr": 1e-3},
    {"fc_hidden": 256, "c1": 32, "c2": 64, "c3": 128, "opt": "Adam", "lr": 1e-3},
    {"fc_hidden": 256, "c1": 32, "c2": 64, "c3": 128, "opt": "SGD",  "lr": 0.01},
    {"fc_hidden": 512, "c1": 32, "c2": 64, "c3": 128, "opt": "SGD",  "lr": 0.01},
]

results = []
best_overall = {"acc": 0.0, "cfg": None}

for cfg in configs:
    m = MNIST_CNN(fc_hidden=cfg["fc_hidden"], c1=cfg["c1"], c2=cfg["c2"], c3=cfg["c3"]).to(device)
    opt = (optim.Adam if cfg["opt"]=="Adam" else optim.SGD)(m.parameters(), lr=cfg["lr"], **({"weight_decay":5e-4} if cfg["opt"]=="Adam" else {"momentum":0.9,"weight_decay":5e-4}))
    print("== Config:", cfg, "params:", count_params(m))
    _ = train(m, train_loader, test_loader, opt, epochs=5, save_prefix="best_cnn_search_state")  # se sobrescribe si mejora
    val_loss, val_acc = evaluate(m, test_loader)
    results.append({**cfg, "params": count_params(m), "val_acc": float(val_acc)})
    if val_acc > best_overall["acc"]:
        best_overall = {"acc": float(val_acc), "cfg": cfg}

print("\nResumen:")
for r in results:
    print(r)
print("\nMejor config:", best_overall)


== Config: {'fc_hidden': 128, 'c1': 16, 'c2': 32, 'c3': 64, 'opt': 'Adam', 'lr': 0.001} params: 426122
Epoch 1: val_loss=0.0487 val_acc=0.9839
Epoch 2: val_loss=0.0389 val_acc=0.9883
Epoch 3: val_loss=0.0313 val_acc=0.9898
Epoch 4: val_loss=0.0256 val_acc=0.9911
Epoch 5: val_loss=0.0241 val_acc=0.9926
Best: {'epoch': 5, 'val_acc': 0.9926}
== Config: {'fc_hidden': 256, 'c1': 32, 'c2': 64, 'c3': 128, 'opt': 'Adam', 'lr': 0.001} params: 1701130
Epoch 1: val_loss=0.0457 val_acc=0.9846
Epoch 2: val_loss=0.0295 val_acc=0.9900
Epoch 3: val_loss=0.0355 val_acc=0.9875
Epoch 4: val_loss=0.0269 val_acc=0.9909
Epoch 5: val_loss=0.0298 val_acc=0.9902
Best: {'epoch': 4, 'val_acc': 0.9909}
== Config: {'fc_hidden': 256, 'c1': 32, 'c2': 64, 'c3': 128, 'opt': 'SGD', 'lr': 0.01} params: 1701130
Epoch 1: val_loss=0.0648 val_acc=0.9796
Epoch 2: val_loss=0.0373 val_acc=0.9886
Epoch 3: val_loss=0.0295 val_acc=0.9890
Epoch 4: val_loss=0.0290 val_acc=0.9900
Epoch 5: val_loss=0.0305 val_acc=0.9902
Best: {'epoch

Cargar baseline MLP del lab anterior

In [None]:
# Clase MLP compatible con el entrenamiento anterior
class MLP(nn.Module):
    def __init__(self, hidden_layers=[256,128], activation=nn.ReLU):
        super().__init__()
        layers = [nn.Flatten(), nn.Linear(28*28, hidden_layers[0]), activation()]
        for i in range(len(hidden_layers)-1):
            layers += [nn.Linear(hidden_layers[i], hidden_layers[i+1]), activation()]
        layers += [nn.Linear(hidden_layers[-1], 10)]
        self.net = nn.Sequential(*layers)
    def forward(self, x): return self.net(x)

#  mejor MLP :
best_params_mlp = {"hidden_layers":[512,256,128], "activation": nn.ReLU}

baseline = MLP(**best_params_mlp).to(device)
baseline.load_state_dict(torch.load("best_random_model.pth", map_location=device))
baseline.eval()

# (opcional) eval rápida
_, acc_base = evaluate(baseline, test_loader)
print(f"Baseline MLP acc: {acc_base:.4f}")


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

@torch.no_grad()
def preds_and_labels(model, loader, device='cpu'):
    model.eval()
    all_p, all_y = [], []
    for x, y in loader:
        x = x.to(device)
        p = model(x).argmax(1).cpu().numpy()
        all_p.append(p); all_y.append(y.numpy())
    return np.concatenate(all_p), np.concatenate(all_y)

def plot_cm(cm, title, normalize=False):
    if normalize:
        cm = cm.astype(np.float64)
        cm = cm / cm.sum(axis=1, keepdims=True)
    plt.figure(figsize=(5,5))
    plt.imshow(cm, interpolation='nearest')
    plt.title(title)
    plt.xlabel("Predicho")
    plt.ylabel("Real")
    plt.colorbar()
    ticks = np.arange(cm.shape[0])
    plt.xticks(ticks, ticks)
    plt.yticks(ticks, ticks)
    thresh = cm.max()/2.0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            val = f"{cm[i,j]:.2f}" if normalize else int(cm[i,j])
            plt.text(j, i, val, ha="center", va="center",
                     color="white" if cm[i,j] > thresh else "black")
    plt.tight_layout()
    plt.show()

def n_params(m): return sum(p.numel() for p in m.parameters())

# Cargar mejor CNN guardada del paso 4 (si quieres evaluar exactamente la mejor de la búsqueda)
ckpt = torch.load("best_cnn_search_state.pth", map_location=device)
best_cfg = {"fc_hidden":256,"c1":32,"c2":64,"c3":128}  # <-- si guardaste sin metadatos, usa la config que tú viste mejor; si fue la pequeña:
best_cfg = {"fc_hidden":128,"c1":16,"c2":32,"c3":64}   # (ajusta a la mejor que viste en tus prints)

cnn_best = MNIST_CNN(**best_cfg).to(device)
cnn_best.load_state_dict(ckpt["model_state_dict"])
cnn_best.eval()

# CNN
p_cnn, y_cnn = preds_and_labels(cnn_best, test_loader, device=device)
cm_cnn = confusion_matrix(y_cnn, p_cnn)
plot_cm(cm_cnn, "Matriz de Confusión – CNN", normalize=False)
plot_cm(cm_cnn, "Matriz de Confusión – CNN (normalizada)", normalize=True)

# Baseline
p_mlp, y_mlp = preds_and_labels(baseline, test_loader, device=device)
cm_mlp = confusion_matrix(y_mlp, p_mlp)
plot_cm(cm_mlp, "Matriz de Confusión – Baseline", normalize=False)
plot_cm(cm_mlp, "Matriz de Confusión – Baseline (normalizada)", normalize=True)

# Métricas y eficiencia
acc_cnn = np.trace(cm_cnn)/cm_cnn.sum()
acc_mlp = np.trace(cm_mlp)/cm_mlp.sum()
print(f"Acc CNN:  {acc_cnn:.4f}  | params: {n_params(cnn_best):,}")
print(f"Acc MLP:  {acc_mlp:.4f}  | params: {n_params(baseline):,}")
print(f"Acc/MP (CNN): {acc_cnn/(n_params(cnn_best)/1e6):.3f}")
print(f"Acc/MP (MLP): {acc_mlp/(n_params(baseline)/1e6):.3f}")
