### El clasificador (Una clase por imagen)

Utilizando la siguiente arquitectura:

```python
class NN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, progress=True)
        for param in self.model.parameters():
            param.requires_grad = False
         
        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),  
            nn.ReLU(inplace=True),   
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
        
    def forward(self, x):
        return self.model(x)


```

Entrenar una red neuronal que pueda clasificar imágenes en las siguientes clases:

- Mujer joven
- Mujer mayor
- Hombre joven
- Hombre mayor
- Niño
- Niña
- Perro
- Gato
- Ilustración

Las etiquetas deben respetar ese orden, Mujer joven=0, Mujer mayor=1, Hombre joven=2, Hombre mayor=3, Niño=4, Niña=5, Perro=6, Gato=7, Ilustración=8.  

Lo que usted subirá a la plataforma es únicamente el modelo en formato safetensors, con su nombre y matricula como nombre del modelo.

No pueden repetir modelos, los modelos iguales tendrán 0 de nota.

Si usan un formato diferente o cambian la arquitectura valdrá 0.

No incumpliendo estos puntos su nota dependerá del acuracy alcanzado con su modelo, por lo que esfuércese con la recolección de datos y entrenamiento de su modelo y vera las recompensas.

1. Configuración del Entorno


In [86]:
!pip install torch torchvision



2. Importación de Librerías Necesarias


In [87]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset
from PIL import Image
import requests
from io import BytesIO
import os
import copy

3. Definición de la Clase del Modelo


In [88]:
class NN(nn.Module):
    def __init__(self, num_classes=9):
        super().__init__()
        self.model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1, progress=True)
        for param in self.model.parameters():
            param.requires_grad = False

        self.model.fc = nn.Sequential(
            nn.Linear(self.model.fc.in_features, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        return self.model(x)


4. Preprocesamiento de Datos


In [89]:
import requests
from PIL import Image
from io import BytesIO

def download_image(url):
    try:
        response = requests.get(url)
        img = Image.open(BytesIO(response.content)).convert("RGB")
        return img
    except Exception as e:
        print(f"Error al descargar {url}: {e}")
        return None

class CustomDataset(Dataset):
    def __init__(self, image_urls, labels, transform=None):
        self.image_urls = image_urls
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_urls)

    def __getitem__(self, index):
        image = download_image(self.image_urls[index])
        label = self.labels[index]

        if self.transform and image is not None:
            image = self.transform(image)
        return image, label

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

image_urls = ["https://st4.depositphotos.com/12985790/25532/i/450/depositphotos_255323078-stock-photo-attractive-young-woman-holding-cup.jpg", "https://cdn.pixabay.com/photo/2020/10/05/21/32/woman-5630804_1280.jpg",
              "https://st4.depositphotos.com/1017228/20766/i/450/depositphotos_207663178-stock-photo-image-of-happy-young-man.jpg", "https://previews.123rf.com/images/denisfilm/denisfilm1610/denisfilm161000019/63731762-cara-seria-del-hombre-mayor-hombre-mayor-pensativo-gran-experiencia-de-vida-compostura-y-sabidur%C3%ADa.jpg",
              "https://static.guiainfantil.com/media/31278/c/cuando-el-nino-de-3-anos-no-habla-lg.jpg",
              "https://www.diainternacionalde.com/imagenes/dias/10-11_dia-mundial-nina-2022.jpg", "https://static.fundacion-affinity.org/cdn/farfuture/PVbbIC-0M9y4fPbbCsdvAD8bcjjtbFc0NSP3lRwlWcE/mtime:1643275542/sites/default/files/los-10-sonidos-principales-del-perro.jpg", "https://s1.eestatic.com/2023/03/10/curiosidades/mascotas/747436034_231551832_1706x1280.jpg", "https://historia.nationalgeographic.com.es/medio/2021/12/17/bano-podcast-web_512e9cd3_550x807.jpg"]
labels = [0, 1, 2, 3, 4, 5, 6, 7, 8]

dataset = CustomDataset(image_urls, labels, transform)
test_loader = DataLoader(dataset, batch_size=32, shuffle=False)



GPU

In [90]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Usando {device} para el entrenamiento.")

model = NN(num_classes=9).to(device)

Usando cuda:0 para el entrenamiento.


5. Entrenamiento del Modelo


In [91]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10):
    best_acc = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

    print('Mejor accuracy de validación: {:4f}'.format(best_acc))

    model.load_state_dict(best_model_wts)
    return model


6. Evaluación del Modelo


In [92]:
def evaluate_model(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        with torch.no_grad():
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / len(test_loader.dataset)
    total_acc = running_corrects.double() / len(test_loader.dataset)

    print(f'Test Loss: {total_loss:.4f} Acc: {total_acc:.4f}')


7. Guardado del Modelo


In [93]:
torch.save(model.state_dict(), 'Christian_Zorrilla_21-SISN-2-070.pt')

1. Cargar el Modelo Guardado


In [94]:
model = NN(num_classes=9)
model.load_state_dict(torch.load('Christian_Zorrilla_21-SISN-2-070.pt', map_location=torch.device('cuda')))
model.eval()


NN(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_st

2. Evaluar el Modelo para Obtener el Accuracy


In [95]:
def evaluate_model(model, test_loader):
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)
accuracy = evaluate_model(model, test_loader)
print(f'Accuracy del modelo en los datos de prueba: {accuracy:.2f}%')


Accuracy del modelo en los datos de prueba: 11.11%
