# 1. Arquitectura model

In [1]:
# unzip dataset_letras.zip
!unzip dataset_letras.zip

[1;30;43mSe han truncado las últimas 5000 líneas del flujo de salida.[0m
 extracting: dataset_letras/Q/Q_106.png  
 extracting: dataset_letras/Q/Q_107.png  
 extracting: dataset_letras/Q/Q_108.png  
 extracting: dataset_letras/Q/Q_109.png  
 extracting: dataset_letras/Q/Q_11.png  
 extracting: dataset_letras/Q/Q_110.png  
 extracting: dataset_letras/Q/Q_111.png  
 extracting: dataset_letras/Q/Q_112.png  
 extracting: dataset_letras/Q/Q_113.png  
 extracting: dataset_letras/Q/Q_114.png  
 extracting: dataset_letras/Q/Q_115.png  
 extracting: dataset_letras/Q/Q_116.png  
 extracting: dataset_letras/Q/Q_117.png  
 extracting: dataset_letras/Q/Q_118.png  
 extracting: dataset_letras/Q/Q_119.png  
 extracting: dataset_letras/Q/Q_12.png  
 extracting: dataset_letras/Q/Q_120.png  
 extracting: dataset_letras/Q/Q_121.png  
 extracting: dataset_letras/Q/Q_122.png  
 extracting: dataset_letras/Q/Q_123.png  
 extracting: dataset_letras/Q/Q_124.png  
 extracting: dataset_letras/Q/Q_125.png  
 ex

In [2]:
import torch
import torch.nn as nn
from torchvision import models

# Cargar ResNet-18 preentrenada y ajustarla
class ResNetModel(nn.Module):
    def __init__(self):
        super(ResNetModel, self).__init__()

        # Cargar el modelo ResNet-18 preentrenado
        self.resnet = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)

        # Modificar la última capa fully connected (fc) para 26 clases en lugar de 1000
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, 26)

    def forward(self, x):
        return self.resnet(x)

# Crear una instancia del modelo
model = ResNetModel()



Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 172MB/s]


# 2. Dataset

In [3]:
import os
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Definición de parámetros
batch_size = 32
img_height, img_width = 64, 40  # Tamaño de las imágenes

from torchvision import transforms

# Transformaciones para las imágenes
transform = transforms.Compose([
    transforms.Resize((img_height, img_width)),   # Redimensionar las imágenes
    transforms.ToTensor(),                        # Convertir imágenes a tensores
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),  # Normalizar las imágenes (RGB)
])


# Cargar el dataset
base_dir = 'dataset_letras'
dataset = datasets.ImageFolder(root=base_dir, transform=transform)

# Dividir el dataset en entrenamiento (70%) y prueba (30%)
train_size = int(0.7 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Verificación
print(f"Total dataset size: {len(dataset)}")
print(f"Training size: {len(train_dataset)}")
print(f"Test size: {len(test_dataset)}")

Total dataset size: 13000
Training size: 9100
Test size: 3900


# 3. Train Model

In [5]:
import torch.optim as optim
import torch.nn as nn

# Definir el modelo (ResNet)
model = ResNetModel()

# Definir el optimizador y la función de pérdida
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [6]:
!pip install wandb

Collecting wandb
  Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.15.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading wandb-0.18.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_

In [7]:
import wandb

## Login
wandb.login(key='a14c6a2ec25620e6e2047f787c8dbe5d7710eaef')

## Inicializa WandB
wandb.init(project="repte1_psiv", entity="andreu-mir")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mandreu-mir[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
# see shape of dataset_letras\0\0_0.png

print(dataset[0][0].shape)

torch.Size([3, 64, 40])


In [9]:
# Mover el modelo a la GPU si está disponible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Entrenamiento del modelo
num_epochs = 20
for epoch in range(num_epochs):
    model.train()  # Ponemos el modelo en modo entrenamiento
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        # Mover imágenes y etiquetas a la GPU
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        # Calcular precisión
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calcular pérdida y precisión por época
    epoch_loss = running_loss / len(train_loader)
    epoch_accuracy = correct / total

    # Registrar en WandB
    wandb.log({
        "epoch": epoch + 1,
        "loss": epoch_loss,
        "accuracy": epoch_accuracy,
    })

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_accuracy:.4f}')


Epoch [1/20], Loss: 0.2976, Accuracy: 0.9190
Epoch [2/20], Loss: 0.1074, Accuracy: 0.9713
Epoch [3/20], Loss: 0.0781, Accuracy: 0.9793
Epoch [4/20], Loss: 0.0692, Accuracy: 0.9808
Epoch [5/20], Loss: 0.0466, Accuracy: 0.9866
Epoch [6/20], Loss: 0.0362, Accuracy: 0.9896
Epoch [7/20], Loss: 0.0428, Accuracy: 0.9899
Epoch [8/20], Loss: 0.0389, Accuracy: 0.9907
Epoch [9/20], Loss: 0.0615, Accuracy: 0.9847
Epoch [10/20], Loss: 0.0316, Accuracy: 0.9908
Epoch [11/20], Loss: 0.0364, Accuracy: 0.9903
Epoch [12/20], Loss: 0.0430, Accuracy: 0.9903
Epoch [13/20], Loss: 0.0105, Accuracy: 0.9963
Epoch [14/20], Loss: 0.0232, Accuracy: 0.9930
Epoch [15/20], Loss: 0.0174, Accuracy: 0.9951
Epoch [16/20], Loss: 0.0151, Accuracy: 0.9956
Epoch [17/20], Loss: 0.0573, Accuracy: 0.9871
Epoch [18/20], Loss: 0.0234, Accuracy: 0.9936
Epoch [19/20], Loss: 0.0174, Accuracy: 0.9947
Epoch [20/20], Loss: 0.0134, Accuracy: 0.9957


# 4. Test Model

In [10]:
# Evaluación del modelo
model.eval()  # Ponemos el modelo en modo evaluación
correct = 0
total = 0

# No necesitamos calcular gradientes durante la evaluación
with torch.no_grad():
    for images, labels in test_loader:
        # Mover imágenes y etiquetas a la GPU
        images, labels = images.to(device), labels.to(device)

        # Realizar predicciones
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

# Calcular la precisión
accuracy = 100 * correct / total
print(f"Accuracy: {accuracy:.2f}%")

# Log de precisión en WandB
wandb.log({"Accuracy": accuracy})

# Finaliza la sesión de WandB
wandb.finish()


Accuracy: 98.38%


VBox(children=(Label(value='0.012 MB of 0.012 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Accuracy,▁
accuracy,▁▆▆▇▇▇▇▇▇█▇▇████▇███
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,█▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁

0,1
Accuracy,98.38462
accuracy,0.99571
epoch,20.0
loss,0.01339


In [11]:
# Finaliza la sesión de WandB
wandb.finish()