## 1. Arquitectura model

In [32]:
# unzip dataset_numeros.zip
!unzip dataset_numeros.zip

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
 extracting: dataset_numeros/0/0_0.png  
 extracting: dataset_numeros/0/0_1.png  
 extracting: dataset_numeros/0/0_10.png  
 extracting: dataset_numeros/0/0_100.png  
 extracting: dataset_numeros/0/0_101.png  
 extracting: dataset_numeros/0/0_102.png  
 extracting: dataset_numeros/0/0_103.png  
 extracting: dataset_numeros/0/0_104.png  
 extracting: dataset_numeros/0/0_105.png  
 extracting: dataset_numeros/0/0_106.png  
 extracting: dataset_numeros/0/0_107.png  
 extracting: dataset_numeros/0/0_108.png  
 extracting: dataset_numeros/0/0_109.png  
 extracting: dataset_numeros/0/0_11.png  
 extracting: dataset_numeros/0/0_110.png  
 extracting: dataset_numeros/0/0_111.png  
 extracting: dataset_numeros/0/0_112.png  
 extracting: dataset_numeros/0/0_113.png  
 extracting: dataset_numeros/0/0_114.png  
 extracting: dataset_numeros/0/0_115.png  
 extracting: dataset_numeros/0/0_116.png  
 extracting: dataset_numeros

In [43]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        # Definir las capas convolucionales y de agrupamiento
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)  # Capa convolucional 1
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling 1

        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)  # Capa convolucional 2
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling 2

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)  # Capa convolucional 3
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)  # MaxPooling 3

        self.fc1 = nn.Linear(128 * 8 * 5, 128)  # Capa densa
        self.dropout = nn.Dropout(0.5)  # Dropout para regularización
        self.fc2 = nn.Linear(128, 10)  # Capa de salida para 10 clases

    def forward(self, x):
        # Definir el paso hacia adelante
        x = self.pool1(F.relu(self.conv1(x)))  # Capa 1
        x = self.pool2(F.relu(self.conv2(x)))  # Capa 2
        x = self.pool3(F.relu(self.conv3(x)))  # Capa 3

        x = x.view(-1, 128 * 8 * 5)  # Aplanar la salida para la capa densa
        x = F.relu(self.fc1(x))  # Capa densa
        x = self.dropout(x)  # Aplicar dropout
        x = self.fc2(x)  # Capa de salida

        return x

# Crear una instancia del modelo
model = CNNModel()


## 2. Dataset


In [38]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Definición de parámetros
batch_size = 32
img_height, img_width = 64, 40  # Tamaño de las imágenes

from torchvision import transforms

# Transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),  # Redimensionar las imágenes
    transforms.Grayscale(num_output_channels=1),  # Asegurarse de que la imagen sea en escala de grises
    transforms.ToTensor(),  # Convertir imágenes a tensores
    transforms.Normalize((0.5,), (0.5,)),  # Normalizar las imágenes (grayscale)
])

# Cargar el dataset
base_dir = 'dataset_numeros'
dataset = datasets.ImageFolder(root=base_dir, transform=transform)

# Dividir el dataset en entrenamiento (70%) y prueba (30%)
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Verificación
print(f"Total dataset size: {len(dataset)}")
print(f"Training size: {len(train_dataset)}")
print(f"Test size: {len(test_dataset)}")

Total dataset size: 5000
Training size: 3500
Test size: 1500


## 3. Train Model

In [44]:
import torch.nn as nn
import torch.optim as optim

# Definición del modelo (usar el que has creado previamente)
model = CNNModel()
# Definir el optimizador y la función de pérdida
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()


In [13]:
!pip install wandb


Collecting wandb
  Downloading wandb-0.18.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.15.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading wandb-0.18.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_

In [49]:
import wandb

# Login
wandb.login(key='a14c6a2ec25620e6e2047f787c8dbe5d7710eaef')

# Inicializa WandB
wandb.init(project="repte1_psiv", entity="andreu-mir")

[34m[1mwandb[0m: Currently logged in as: [33mandreu-mir[0m ([33mxisca[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [39]:
# see shape of dataset_numeros\0\0_0.png

print(dataset[0][0].shape)

torch.Size([1, 64, 40])


In [46]:
# Entrenamiento del modelo

# Move the model to the GPU before training
model.to('cuda') # This line is added to move the model's weights to the GPU

train_losses = []
train_accuracies = []

num_epochs = 20
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images = images.to('cuda')  # Usa 'cuda' si tienes GPU
        labels = labels.to('cuda')  # Usa 'cuda' si tienes GPU

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calcular precisión
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calcula y registra la pérdida y la precisión
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct / total

    train_losses.append(epoch_loss)
    train_accuracies.append(epoch_accuracy)

    # Log metrics to WandB
    wandb.log({
        "epoch": epoch + 1,
        "loss": epoch_loss,
        "accuracy": epoch_accuracy,
    })

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')


Epoch [1/20], Loss: 0.1856, Accuracy: 0.9403
Epoch [2/20], Loss: 0.0897, Accuracy: 0.9717
Epoch [3/20], Loss: 0.0585, Accuracy: 0.9794
Epoch [4/20], Loss: 0.0582, Accuracy: 0.9829
Epoch [5/20], Loss: 0.0411, Accuracy: 0.9889
Epoch [6/20], Loss: 0.0380, Accuracy: 0.9863
Epoch [7/20], Loss: 0.0242, Accuracy: 0.9926
Epoch [8/20], Loss: 0.0263, Accuracy: 0.9903
Epoch [9/20], Loss: 0.0283, Accuracy: 0.9900
Epoch [10/20], Loss: 0.0144, Accuracy: 0.9960
Epoch [11/20], Loss: 0.0148, Accuracy: 0.9963
Epoch [12/20], Loss: 0.0249, Accuracy: 0.9914
Epoch [13/20], Loss: 0.0187, Accuracy: 0.9931
Epoch [14/20], Loss: 0.0072, Accuracy: 0.9974
Epoch [15/20], Loss: 0.0173, Accuracy: 0.9934
Epoch [16/20], Loss: 0.0176, Accuracy: 0.9943
Epoch [17/20], Loss: 0.0222, Accuracy: 0.9911
Epoch [18/20], Loss: 0.0293, Accuracy: 0.9903
Epoch [19/20], Loss: 0.0195, Accuracy: 0.9920
Epoch [20/20], Loss: 0.0107, Accuracy: 0.9960


VBox(children=(Label(value='0.402 MB of 0.402 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
accuracy,▁▅▆▆▇▇▇▇▇██▇▇███▇▇▇█
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▄▃▃▂▂▂▂▂▁▁▂▁▁▁▁▂▂▁▁

0,1
accuracy,0.996
epoch,20.0
loss,0.01072


## 4. Test model

In [50]:
# Model to CPU
model.to('cpu')

# Evaluación del modelo
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.view(-1, 1, img_height, img_width)  # (batch_size, channels, height, width)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calcular la precisión
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

# Log de precisión en WandB
wandb.log({"Accuracy": accuracy})


Accuracy: 98.93%


In [51]:
# Finaliza la sesión de WandB
wandb.finish()

VBox(children=(Label(value='0.011 MB of 0.011 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Accuracy,▁

0,1
Accuracy,98.93333
