# Modelo de red neuronal convolucional base (18 capas)

Explicar lo que vamos a hacer en este notebook

### Preparación de los datos

In [1]:
# Importamos las paqueterías necesarias para el notebook
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

# De ser posible utilizaremos GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def data_loader(data_dir,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):
    """
    Función para cargar los datos de CIFAR-10
    """
    
    # Definimos el transform para normalizar los datos con pytorch
    # Los valores fueron obtenidos en el notebook de datos "data_extraction.ipynb"
    normalize = transforms.Normalize(  
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # Definimos el transform para preporcesar los datos
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
        normalize
    ])
    
    # Obtener los datos del conjunto de prueba
    if test:
        dataset = datasets.CIFAR10(
          root=data_dir, train=False,
          download=True, transform=transform_test,
        )

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # Cargamos una copia de los datos de entrenamiento
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform_train,
    )
    
    # Cargamos una copia extra de los datos de entrenamiento para dividirlo después en el conjunto de validación
    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform_train,
    )
    
    # Separamos los datos de entrenamiento y validación mediante índices
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    # Finalmente, definimos los conjuntos de entrenamiento y validación
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


# Aplicamos la función para cargar los datos de CIFAR-10, los guardamos en el directorio actual
train_loader, valid_loader = data_loader(data_dir='./data',
                                         batch_size=128)

test_loader = data_loader(data_dir='./data',
                              batch_size=128,
                              test=True)    
cifar10_classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


### Definición del modelo

Explicar en qué consiste el modelo y por qué lo elegí de 18 capas y como lo vamos a testear 

In [3]:
class ConvNet_50_capas(nn.Module):
    """
    Modelo de red neuronal convolucional de 18 capas para clasificar imágenes en 10 clases posibles
    """
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
                                  nn.BatchNorm2d(16),
                                  nn.ReLU())
        
        self.bloc1 = nn.Sequential(nn.Conv2d(16, 16, kernel_size=3, stride = 1, padding = 1),
                                   nn.BatchNorm2d(16),
                                   nn.ReLU()
                                   )
        self.conv_down2 = nn.Sequential(nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),  #  feature map size = (16,16)
                                        nn.BatchNorm2d(32),
                                        nn.ReLU()
                                       )
        
        self.bloc2 = nn.Sequential(nn.Conv2d(32, 32, kernel_size=3, stride = 1, padding = 1),
                                   nn.BatchNorm2d(32),
                                   nn.ReLU()
                                  )
        
        self.conv_down3 = nn.Sequential(nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),  #  feature map size = (8,8)
                                        nn.BatchNorm2d(64),
                                        nn.ReLU()
                                       )
        
        self.bloc3 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=3, stride = 1, padding = 1),
                                   nn.BatchNorm2d(64),
                                   nn.ReLU()
                                  )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64,10)


    def forward(self,x):
        out = self.conv1(x)
        for i in range(16):
            out = self.bloc1(out)
        out = self.conv_down2(out)
        for i in range(15):
            out = self.bloc2(out)
        out = self.conv_down3(out)
        for i in range(15):
            out = self.bloc3(out)
        
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        out = F.softmax(out, dim = 1)
        return out

### Entrenamiento

In [4]:
# variables para guardar los resultados
accuracy_training_epochs = []
accuracy_validation_epochs = []
loss_epoch = []
test_accuracy = []

In [5]:
# hiperparámetros reportados en el artículo, número de épocas reducido.
num_classes = 10
num_epochs = 90

model = ConvNet_50_capas().to(device)

optimizer = optim.SGD(
        params=model.parameters(),
        lr=0.1,
        momentum=0.9,
        weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()
lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=90)

In [6]:
import gc
total_step = len(train_loader)

for epoch in range(num_epochs):
    start_time = time.time()
    for i, (images, labels) in enumerate(train_loader):
        # Mover a los tensores a GPU de ser posible
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass y descenso de gradiente estocástico
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Ahorro de memoria
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    loss_epoch.append(loss.item())
    lr_scheduler.step() # Implementación de learning rate decay

    # Exactitud en el conjunto de validación
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        val_accuracy = correct/total
        accuracy_validation_epochs.append(val_accuracy)

    # Exactitud en el total del conjunto de entrenamiento
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        train_accuracy = correct/total
        accuracy_training_epochs.append(train_accuracy)

    # Exactitud en el conjunto de prueba
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        t_acc = correct/total
        test_accuracy.append(t_acc)



    print(f"Epoch [{epoch+1}/{num_epochs}], Training accuracy: {round(train_accuracy,3)}, Validation accuracy: {round(val_accuracy,3)}, loss = {round(loss_epoch[-1],3)}")
    print(f"Time spent on epoch {epoch+1}: {round((time.time()-start_time)/60,2)}min")

Epoch [1/90], Training accuracy: 0.195, Validation accuracy: 0.192, loss = 2.251
Time spent on epoch 1: 1.68min
Epoch [2/90], Training accuracy: 0.164, Validation accuracy: 0.161, loss = 2.238
Time spent on epoch 2: 1.72min
Epoch [3/90], Training accuracy: 0.24, Validation accuracy: 0.241, loss = 2.222
Time spent on epoch 3: 1.87min
Epoch [4/90], Training accuracy: 0.244, Validation accuracy: 0.245, loss = 2.163
Time spent on epoch 4: 1.79min
Epoch [5/90], Training accuracy: 0.249, Validation accuracy: 0.24, loss = 2.14
Time spent on epoch 5: 1.83min
Epoch [6/90], Training accuracy: 0.255, Validation accuracy: 0.244, loss = 2.25
Time spent on epoch 6: 1.87min
Epoch [7/90], Training accuracy: 0.266, Validation accuracy: 0.261, loss = 2.175
Time spent on epoch 7: 1.91min
Epoch [8/90], Training accuracy: 0.259, Validation accuracy: 0.258, loss = 2.214
Time spent on epoch 8: 1.93min
Epoch [9/90], Training accuracy: 0.266, Validation accuracy: 0.253, loss = 2.203
Time spent on epoch 9: 1.96

Epoch [74/90], Training accuracy: 0.617, Validation accuracy: 0.604, loss = 1.801
Time spent on epoch 74: 2.02min
Epoch [75/90], Training accuracy: 0.621, Validation accuracy: 0.604, loss = 1.887
Time spent on epoch 75: 2.0min
Epoch [76/90], Training accuracy: 0.62, Validation accuracy: 0.605, loss = 1.85
Time spent on epoch 76: 2.02min
Epoch [77/90], Training accuracy: 0.624, Validation accuracy: 0.61, loss = 1.831
Time spent on epoch 77: 2.01min
Epoch [78/90], Training accuracy: 0.625, Validation accuracy: 0.603, loss = 1.905
Time spent on epoch 78: 2.01min
Epoch [79/90], Training accuracy: 0.626, Validation accuracy: 0.619, loss = 1.814
Time spent on epoch 79: 2.01min
Epoch [80/90], Training accuracy: 0.631, Validation accuracy: 0.611, loss = 1.819
Time spent on epoch 80: 2.01min
Epoch [81/90], Training accuracy: 0.632, Validation accuracy: 0.617, loss = 1.794
Time spent on epoch 81: 2.01min
Epoch [82/90], Training accuracy: 0.632, Validation accuracy: 0.616, loss = 1.957
Time spent

In [7]:
# guardar el modelo
torch.save(model,"./convnet50.pt")

### Resultados

In [8]:
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [9]:
results_dict = {"loss": loss_epoch,
    'Train':accuracy_training_epochs,
     'Validation': accuracy_validation_epochs,
     "Test":test_accuracy}
results = pd.DataFrame(results_dict)
results.to_csv("./results/results_convnet50.csv",index=False)

In [10]:
results

Unnamed: 0,loss,Train,Validation,Test
0,2.251232,0.194800,0.1924,0.1975
1,2.238303,0.164311,0.1610,0.1725
2,2.221881,0.240044,0.2410,0.2483
3,2.163149,0.244156,0.2448,0.2596
4,2.139840,0.248556,0.2402,0.2600
...,...,...,...,...
85,1.786909,0.635600,0.6186,0.6275
86,1.741324,0.636289,0.6168,0.6313
87,1.795027,0.636733,0.6196,0.6299
88,1.795526,0.636822,0.6174,0.6272


In [11]:
accuracy = max(results["Test"])

In [12]:
print(accuracy)

0.6313
