# Modelo de red neuronal convolucional base (18 capas)

Explicar lo que vamos a hacer en este notebook

### Preparación de los datos

In [1]:
# Importamos las paqueterías necesarias para el notebook
import time
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler

# De ser posible utilizaremos GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [2]:
def data_loader(data_dir,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):
    """
    Función para cargar los datos de CIFAR-10
    """
    
    # Definimos el transform para normalizar los datos con pytorch
    # Los valores fueron obtenidos en el notebook de datos "data_extraction.ipynb"
    normalize = transforms.Normalize(  
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # Definimos el transform para preporcesar los datos
    transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            normalize,
    ])
    
    # Obtener los datos del conjunto de prueba
    if test:
        dataset = datasets.CIFAR10(
          root=data_dir, train=False,
          download=True, transform=transform,
        )

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # Cargamos una copia de los datos de entrenamiento
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform,
    )
    
    # Cargamos una copia extra de los datos de entrenamiento para dividirlo después en el conjunto de validación
    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform,
    )
    
    # Separamos los datos de entrenamiento y validación mediante índices
    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)
    
    # Finalmente, definimos los conjuntos de entrenamiento y validación
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


# Aplicamos la función para cargar los datos de CIFAR-10, los guardamos en el directorio actual
train_loader, valid_loader = data_loader(data_dir='./data',
                                         batch_size=128)

test_loader = data_loader(data_dir='./data',
                              batch_size=128,
                              test=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 99138189.75it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Files already downloaded and verified


### Definición del modelo

Explicar en qué consiste el modelo y por qué lo elegí de 18 capas y como lo vamos a testear 

In [3]:
class ConvNet_18_capas(nn.Module):
    """
    Modelo de red neuronal convolucional de 18 capas para clasificar imágenes en 10 clases posibles
    """
    def __init__(self):
        super().__init__()
        self.network = nn.Sequential(
            #  Primera capa convolucional con downsampling
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3), #  feature map size = (112, 112)
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1), #  feature map size = (56, 56)

            # Primer bloque de capas convolucionales sin downsampling
            nn.Conv2d(64, 64, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(64),
            nn.ReLU(),

            # Segunda capa convolucional con downsampling, número de canales duplicado
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),  #  feature map size = (28, 28)
            nn.BatchNorm2d(128),
            nn.ReLU(),

            # Segundo bloque de capas convolucionales sin downsampling 
            nn.Conv2d(128, 128, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(128),
            nn.ReLU(),

            # Tercera capa convolucional con downsampling, número de canales duplicado
            nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1),  #  feature map size = (14, 14)
            nn.BatchNorm2d(256),
            nn.ReLU(),

            # Tercer bloque de capas convolucionales sin downsampling
            nn.Conv2d(256, 256, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(256),
            nn.ReLU(),

            # Cuarta capa convolucional con downsampling, número de canales duplicado
            nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1),  #  feature map size = (7, 7)
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # Cuarto bloque de capas convolucionales sin downsampling
            nn.Conv2d(512, 512, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride = 1, padding = 1),
            nn.BatchNorm2d(512),
            nn.ReLU(),

            # Capa final de pooling, cada imagen se ve reducida a un tensor 1-dimensional de longitud 512
            nn.AdaptiveAvgPool2d((1, 1)),
        )
        # Capa final de clasificación
        self.fc = nn.Linear(512,10)


    def forward(self,x):
        out = self.network(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

### Entrenamiento

In [4]:
# variables para guardar los resultados
accuracy_training_epochs = []
accuracy_validation_epochs = []
loss_epoch = []
test_accuracy = []

In [5]:
# hiperparámetros
num_classes = 10
num_epochs = 30

model = ConvNet_18_capas().to(device)

optimizer = optim.SGD(
        params=model.parameters(),
        lr=0.1,
        momentum=0.9,
        weight_decay=0.0001)
criterion = nn.CrossEntropyLoss()
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1)

In [6]:
import gc
total_step = len(train_loader)

for epoch in range(num_epochs):
    start_time = time.time()
    for i, (images, labels) in enumerate(train_loader):
        # Mover a los tensores a GPU de ser posible
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward pass y descenso de gradiente estocástico
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Ahorro de memoria
        del images, labels, outputs
        torch.cuda.empty_cache()
        gc.collect()

    loss_epoch.append(loss.item())
    lr_scheduler.step() # Implementación de learning rate decay

    # Exactitud en el conjunto de validación
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        val_accuracy = correct/total
        accuracy_validation_epochs.append(val_accuracy)

    # Exactitud en el total del conjunto de entrenamiento
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        train_accuracy = correct/total
        accuracy_training_epochs.append(train_accuracy)

    # Exactitud en el conjunto de prueba
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            del images, labels, outputs
        t_acc = correct/total
        test_accuracy.append(t_acc)



    print(f"Epoch [{epoch+1}/{num_epochs}], Training accuracy: {round(train_accuracy,3)}, Validation accuracy: {round(val_accuracy,3)}, loss = {round(loss_epoch[-1],3)}")
    print(f"Time spent on epoch {epoch+1}: {round((time.time()-start_time)/60,2)}min")

Epoch [1/30], Training accuracy: 0.386, Validation accuracy: 0.38, loss = 1.808
Time spent on epoch 1: 6.93min
Epoch [2/30], Training accuracy: 0.494, Validation accuracy: 0.478, loss = 1.478
Time spent on epoch 2: 6.89min
Epoch [3/30], Training accuracy: 0.599, Validation accuracy: 0.583, loss = 1.172
Time spent on epoch 3: 6.97min
Epoch [4/30], Training accuracy: 0.714, Validation accuracy: 0.694, loss = 0.958
Time spent on epoch 4: 7.13min
Epoch [5/30], Training accuracy: 0.753, Validation accuracy: 0.724, loss = 0.783
Time spent on epoch 5: 6.95min
Epoch [6/30], Training accuracy: 0.794, Validation accuracy: 0.764, loss = 0.951
Time spent on epoch 6: 6.99min
Epoch [7/30], Training accuracy: 0.821, Validation accuracy: 0.78, loss = 0.61
Time spent on epoch 7: 6.97min
Epoch [8/30], Training accuracy: 0.845, Validation accuracy: 0.796, loss = 0.373
Time spent on epoch 8: 6.97min
Epoch [9/30], Training accuracy: 0.869, Validation accuracy: 0.813, loss = 0.462
Time spent on epoch 9: 6.9

In [7]:
# guardar el modelo
torch.save(model,"./convnet18.pth")

### Resultados

In [8]:
import pandas as pd

In [9]:
results_dict = {"loss": loss_epoch,
    'Train':accuracy_training_epochs,
     'Validation': accuracy_validation_epochs,
     "Test":test_accuracy}
results = pd.DataFrame(results_dict)
results.to_csv("./results.csv",index=False)

In [10]:
results

Unnamed: 0,loss,Train,Validation,Test
0,1.808371,0.386489,0.3804,0.3858
1,1.477534,0.494489,0.4782,0.4902
2,1.171981,0.5988,0.5826,0.5902
3,0.958366,0.7138,0.6938,0.6931
4,0.78297,0.753044,0.724,0.7272
5,0.95108,0.793978,0.7636,0.7559
6,0.609669,0.820667,0.7796,0.7751
7,0.372714,0.845267,0.7964,0.7982
8,0.462168,0.869222,0.8134,0.8055
9,0.531625,0.880356,0.8152,0.8032
