# Modelos de Deep Learning

Laboratorio 03 – Deep Learning

Edwin Ortega 22305 - Esteban Zambrano 22119 - Diego García 22404

Link del repositorio:<br>
https://github.com/EstebanZG999/Lab3_DS

### Imports

In [None]:
import os
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

### Verifica entorno

In [6]:
# Verifica CUDA disponible
print("¿CUDA disponible?:", torch.cuda.is_available())

# Verifica GPU
if torch.cuda.is_available():
    print("GPU detectada:", torch.cuda.get_device_name(0))

¿CUDA disponible?: True
GPU detectada: NVIDIA GeForce RTX 4060 Laptop GPU


### Preprocesamiento y Dataset

In [8]:
class PolyMNISTDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Dataset personalizado para PolyMNIST.
        - root_dir: carpeta raíz que contiene subcarpetas m0, m1, ..., m4.
        - transform: transformaciones a aplicar a cada imagen.
        """
        self.image_paths = []
        self.labels = []
        self.transform = transform

        for label in range(5):  # m0 = 0, m1 = 1, ..., m4 = 4
            class_dir = os.path.join(root_dir, f"m{label}")
            for img_name in os.listdir(class_dir):
                if img_name.endswith(".png"):
                    self.image_paths.append(os.path.join(class_dir, img_name))
                    self.labels.append(label)

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        image = Image.open(img_path).convert("L")  # convertir a escala de grises
        if self.transform:
            image = self.transform(image)

        return image, label


Cargar el dataset y crear el DataLoader

In [13]:
# Definir transformaciones
transform = transforms.Compose([
    transforms.Resize((28, 28)),       # Asegura tamaño uniforme
    transforms.ToTensor(),             # Convierte a tensor (C, H, W)
    transforms.Normalize((0.5,), (0.5,))  # Normaliza a [-1, 1]
])

# Dataset y DataLoader
train_dir = "../data/PolyMNIST/MMNIST/train"
train_dataset = PolyMNISTDataset(root_dir=train_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

# Verifica que funciona
images, labels = next(iter(train_loader))
print(f"Shape batch imágenes: {images.shape}")  # Esperado: (64, 1, 28, 28)
print(f"Labels: {labels[:10]}")


Shape batch imágenes: torch.Size([64, 1, 28, 28])
Labels: tensor([3, 4, 3, 4, 3, 0, 1, 0, 1, 1])


GPU Check en el loader

In [14]:
# GPU Check en el loader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Entrenando en:", device)

# Mover un batch de prueba a GPU
images, labels = next(iter(train_loader))
images, labels = images.to(device), labels.to(device)
print("Datos en GPU:", images.device)

Entrenando en: cuda
Datos en GPU: cuda:0


### Primer Modelo - CNN 

In [16]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)  # Reducción de tamaño a la mitad
        self.fc1 = nn.Linear(32 * 7 * 7, 128)  # Flatten final
        self.fc2 = nn.Linear(128, 5)  # 5 clases: m0 a m4

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))  # [1, 28, 28] -> [16, 14, 14]
        x = self.pool(F.relu(self.conv2(x)))  # [16, 14, 14] -> [32, 7, 7]
        x = x.view(-1, 32 * 7 * 7)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

##### Entrenaiento primer modelo

In [None]:
# Función de pérdida y optimizador
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Bucle de entrenamiento
epochs = 5
for epoch in range(1, epochs+1):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc  = correct / total
    print(f"Época {epoch}/{epochs} — Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.4f}")

# Evaluación en el set de test
model.eval()
test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)

        test_loss += loss.item() * images.size(0)
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

test_loss /= total
test_acc  = correct / total
print(f"\n— Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.4f}")

# Guarda el modelo entrenado
torch.save(model.state_dict(), "cnn_polymnist_m0-4.pth")
print("\nModelo guardado como cnn_polymnist_m0-4.pth")