In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import itertools
import pandas as pd


In [3]:
#Carga del Dataset MNIST
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

def get_dataloader(batch_size=64):
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

100%|██████████| 9.91M/9.91M [00:01<00:00, 7.10MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 446kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 4.50MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 2.80MB/s]


In [4]:
# Definición de la clase MLP
class MLP(nn.Module):
    def __init__(self, hidden_layers=[128, 64], activation='relu'):
        super(MLP, self).__init__()
        
        self.activation = activation
        layers = []
        
        input_size = 28*28
        for hidden_size in hidden_layers:
            layers.append(nn.Linear(input_size, hidden_size))
            input_size = hidden_size
        layers.append(nn.Linear(input_size, 10))
        
        self.layers = nn.ModuleList(layers)

    def forward(self, x):
        x = x.view(-1, 28*28)
        for i, layer in enumerate(self.layers):
            x = layer(x)
            if i < len(self.layers) - 1:
                if self.activation == 'relu':
                    x = F.relu(x)
                elif self.activation == 'tanh':
                    x = torch.tanh(x)
                elif self.activation == 'sigmoid':
                    x = torch.sigmoid(x)
        return x

In [6]:
# Entrenamiento y Evaluación
def train_model(model, train_loader, optimizer, criterion, epochs=5):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for images, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy


In [7]:
# Grid Search de Hiperparámetros
hidden_layers_options = [[128, 64], [256, 128, 64], [64]]
activation_options = ['relu', 'tanh', 'sigmoid']
learning_rate_options = [0.001, 0.01]
batch_size_options = [32, 64]
epochs_options = [5, 10]

# Generar todas las combinaciones
grid = list(itertools.product(hidden_layers_options, activation_options, learning_rate_options, batch_size_options, epochs_options))

print(f"Total de combinaciones: {len(grid)}")

grid_results = []

for idx, (hidden_layers, activation, lr, batch_size, epochs) in enumerate(grid):
    print(f"\n=== Probando Configuración {idx+1}/{len(grid)} ===")
    print(f"Capas: {hidden_layers} | Activación: {activation} | LR: {lr} | Batch: {batch_size} | Epochs: {epochs}")
    
    train_loader, test_loader = get_dataloader(batch_size=batch_size)
    model = MLP(hidden_layers=hidden_layers, activation=activation)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()
    
    train_model(model, train_loader, optimizer, criterion, epochs=epochs)
    acc = evaluate_model(model, test_loader)
    print(f"Accuracy: {acc:.2f}%")
    
    grid_results.append({
        "Capas Ocultas": hidden_layers,
        "Activación": activation,
        "Learning Rate": lr,
        "Batch Size": batch_size,
        "Epochs": epochs,
        "Accuracy": acc
    })


Total de combinaciones: 72

=== Probando Configuración 1/72 ===
Capas: [128, 64] | Activación: relu | LR: 0.001 | Batch: 32 | Epochs: 5
Epoch [1/5], Loss: 0.3566
Epoch [2/5], Loss: 0.1703
Epoch [3/5], Loss: 0.1295
Epoch [4/5], Loss: 0.1106
Epoch [5/5], Loss: 0.0963
Accuracy: 96.13%

=== Probando Configuración 2/72 ===
Capas: [128, 64] | Activación: relu | LR: 0.001 | Batch: 32 | Epochs: 10
Epoch [1/10], Loss: 0.3429
Epoch [2/10], Loss: 0.1629
Epoch [3/10], Loss: 0.1231
Epoch [4/10], Loss: 0.1027
Epoch [5/10], Loss: 0.0883
Epoch [6/10], Loss: 0.0788
Epoch [7/10], Loss: 0.0700
Epoch [8/10], Loss: 0.0632
Epoch [9/10], Loss: 0.0579
Epoch [10/10], Loss: 0.0534
Accuracy: 97.10%

=== Probando Configuración 3/72 ===
Capas: [128, 64] | Activación: relu | LR: 0.001 | Batch: 64 | Epochs: 5
Epoch [1/5], Loss: 0.3927
Epoch [2/5], Loss: 0.1841
Epoch [3/5], Loss: 0.1329
Epoch [4/5], Loss: 0.1092
Epoch [5/5], Loss: 0.0954
Accuracy: 97.10%

=== Probando Configuración 4/72 ===
Capas: [128, 64] | Activac

In [8]:
#Tabla final y ranking
df_grid = pd.DataFrame(grid_results)
df_grid = df_grid.sort_values(by="Accuracy", ascending=False).reset_index(drop=True)
print("\nRanking de todas las combinaciones probadas:")
print(df_grid.head(10))  # Muestra las 10 mejores


Ranking de todas las combinaciones probadas:
    Capas Ocultas Activación  Learning Rate  Batch Size  Epochs  Accuracy
0  [256, 128, 64]    sigmoid          0.001          64      10     97.34
1       [128, 64]    sigmoid          0.001          32      10     97.23
2  [256, 128, 64]       relu          0.001          64      10     97.11
3       [128, 64]       relu          0.001          32      10     97.10
4       [128, 64]       relu          0.001          64       5     97.10
5       [128, 64]       relu          0.001          64      10     97.07
6       [128, 64]    sigmoid          0.001          64      10     97.06
7  [256, 128, 64]       relu          0.001          32      10     97.00
8  [256, 128, 64]    sigmoid          0.001          64       5     96.96
9  [256, 128, 64]       tanh          0.001          64      10     96.96
