#### Lab4

In [1]:
import torch, torch.nn as nn, torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

train_ds = datasets.MNIST(root='./data', train=True,  download=True, transform=transform)
test_ds  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True, num_workers=2, pin_memory=True)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False, num_workers=2, pin_memory=True)

100%|██████████| 9.91M/9.91M [00:00<00:00, 10.4MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 531kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.35MB/s]
100%|██████████| 4.54k/4.54k [00:00<?, ?B/s]


In [2]:
# 2.2 Modelo base: 3 conv + 2 maxpool + FC
class MNIST_CNN(nn.Module):
    def __init__(self, fc_hidden=256, c1=32, c2=64, c3=128, dropout=0.25):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1,  c1, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.Conv2d(c1, c2, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 28->14
            nn.Dropout(dropout),

            nn.Conv2d(c2, c3, kernel_size=3, padding=1), nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # 14->7
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(c3*7*7, fc_hidden), nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(fc_hidden, 10)
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

model = MNIST_CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)  # puedes alternar con SGD


In [3]:
# 2.3 Entrenamiento y evaluación
def accuracy(logits, y):
    return (logits.argmax(dim=1) == y).float().mean().item()

@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    loss_accum, acc_accum, n = 0.0, 0.0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        loss = criterion(logits, y)
        b = y.size(0)
        loss_accum += loss.item()*b
        acc_accum  += (logits.argmax(1)==y).float().sum().item()
        n += b
    return loss_accum/n, acc_accum/n

def train(model, train_loader, test_loader, optimizer, epochs=5):
    best = {"epoch": -1, "val_acc": 0.0}
    for ep in range(1, epochs+1):
        model.train()
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()
            loss = criterion(model(x), y)
            loss.backward()
            optimizer.step()
        val_loss, val_acc = evaluate(model, test_loader)
        print(f"Epoch {ep}: val_loss={val_loss:.4f} val_acc={val_acc:.4f}")
        if val_acc > best["val_acc"]:
            best = {"epoch": ep, "val_acc": val_acc}
    print("Best:", best)
    return best

best_base = train(model, train_loader, test_loader, optimizer, epochs=5)




Epoch 1: val_loss=0.0393 val_acc=0.9859
Epoch 2: val_loss=0.0327 val_acc=0.9896
Epoch 3: val_loss=0.0265 val_acc=0.9911
Epoch 4: val_loss=0.0244 val_acc=0.9908
Epoch 5: val_loss=0.0218 val_acc=0.9925
Best: {'epoch': 5, 'val_acc': 0.9925}


In [4]:
from copy import deepcopy

def count_params(m): return sum(p.numel() for p in m.parameters())

configs = [
    {"fc_hidden": 128, "c1": 16, "c2": 32, "c3": 64,  "opt": "Adam", "lr": 1e-3},
    {"fc_hidden": 256, "c1": 32, "c2": 64, "c3": 128, "opt": "Adam", "lr": 1e-3},
    {"fc_hidden": 256, "c1": 32, "c2": 64, "c3": 128, "opt": "SGD",  "lr": 0.01},
    {"fc_hidden": 512, "c1": 32, "c2": 64, "c3": 128, "opt": "SGD",  "lr": 0.01},
]

results = []
for cfg in configs:
    m = MNIST_CNN(fc_hidden=cfg["fc_hidden"], c1=cfg["c1"], c2=cfg["c2"], c3=cfg["c3"]).to(device)
    if cfg["opt"]=="Adam":
        opt = optim.Adam(m.parameters(), lr=cfg["lr"], weight_decay=5e-4)
    else:
        opt = optim.SGD(m.parameters(), lr=cfg["lr"], momentum=0.9, weight_decay=5e-4)
    print("== Config:", cfg, "params:", count_params(m))
    best = train(m, train_loader, test_loader, opt, epochs=5)
    val_loss, val_acc = evaluate(m, test_loader)
    results.append({**cfg, "params": count_params(m), "val_acc": val_acc})
    
print("\nResumen:")
for r in results: print(r)


== Config: {'fc_hidden': 128, 'c1': 16, 'c2': 32, 'c3': 64, 'opt': 'Adam', 'lr': 0.001} params: 426122
Epoch 1: val_loss=0.0487 val_acc=0.9839
Epoch 2: val_loss=0.0389 val_acc=0.9883
Epoch 3: val_loss=0.0313 val_acc=0.9898
Epoch 4: val_loss=0.0256 val_acc=0.9911
Epoch 5: val_loss=0.0241 val_acc=0.9926
Best: {'epoch': 5, 'val_acc': 0.9926}
== Config: {'fc_hidden': 256, 'c1': 32, 'c2': 64, 'c3': 128, 'opt': 'Adam', 'lr': 0.001} params: 1701130
Epoch 1: val_loss=0.0457 val_acc=0.9846
Epoch 2: val_loss=0.0295 val_acc=0.9900
Epoch 3: val_loss=0.0355 val_acc=0.9875
Epoch 4: val_loss=0.0269 val_acc=0.9909
Epoch 5: val_loss=0.0298 val_acc=0.9902
Best: {'epoch': 4, 'val_acc': 0.9909}
== Config: {'fc_hidden': 256, 'c1': 32, 'c2': 64, 'c3': 128, 'opt': 'SGD', 'lr': 0.01} params: 1701130
Epoch 1: val_loss=0.0648 val_acc=0.9796
Epoch 2: val_loss=0.0373 val_acc=0.9886
Epoch 3: val_loss=0.0295 val_acc=0.9890
Epoch 4: val_loss=0.0290 val_acc=0.9900
Epoch 5: val_loss=0.0305 val_acc=0.9902
Best: {'epoch