In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Transformaciones: convertir a tensor y normalizar
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Cargar dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# DataLoaders
batch_size = 64  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [6]:
import torch.nn as nn
import torch.nn.functional as F

class MLP(nn.Module):
    def __init__(self, hidden_layers=[128, 64], activation=nn.ReLU):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28, hidden_layers[0])
        self.fc2 = nn.Linear(hidden_layers[0], hidden_layers[1])
        self.fc3 = nn.Linear(hidden_layers[1], 10)
        self.activation = activation()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        return self.fc3(x)


In [7]:
import torch.optim as optim

def train(model, train_loader, criterion, optimizer, device):
    model.train()
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

def evaluate(model, test_loader, device):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            output = model(images)
            _, predicted = torch.max(output, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total


In [8]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def run_experiment(hidden_layers, activation, lr, batch_size, epochs):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    model = MLP(hidden_layers=hidden_layers, activation=activation).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    for epoch in range(epochs):
        train(model, train_loader, criterion, optimizer, device)
    
    acc = evaluate(model, test_loader, device)
    return acc


In [9]:
import random

param_grid = {
    'hidden_layers': [[128,64], [256,128], [512,256,128]],
    'activation': [nn.ReLU, nn.Tanh, nn.LeakyReLU],
    'lr': [0.01, 0.001, 0.0005],
    'batch_size': [32,64,128],
    'epochs': [5,10,15]
}

results = []
for _ in range(5):  # 5 combinaciones aleatorias
    params = {k: random.choice(v) for k, v in param_grid.items()}
    acc = run_experiment(**params)
    results.append((params, acc))


In [11]:
for params, acc in sorted(results, key=lambda x: x[1], reverse=True):
    print(f"Accuracy: {acc:.4f} con {params}")


Accuracy: 0.9820 con {'hidden_layers': [512, 256, 128], 'activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'lr': 0.0005, 'batch_size': 32, 'epochs': 15}
Accuracy: 0.9815 con {'hidden_layers': [256, 128], 'activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'lr': 0.0005, 'batch_size': 64, 'epochs': 15}
Accuracy: 0.9807 con {'hidden_layers': [512, 256, 128], 'activation': <class 'torch.nn.modules.activation.LeakyReLU'>, 'lr': 0.001, 'batch_size': 64, 'epochs': 5}
Accuracy: 0.9781 con {'hidden_layers': [256, 128], 'activation': <class 'torch.nn.modules.activation.Tanh'>, 'lr': 0.0005, 'batch_size': 64, 'epochs': 10}
Accuracy: 0.9698 con {'hidden_layers': [128, 64], 'activation': <class 'torch.nn.modules.activation.Tanh'>, 'lr': 0.001, 'batch_size': 32, 'epochs': 5}
