In [52]:
%matplotlib inline

Neural Networks
===============

Neural networks can be constructed using the `torch.nn` package.

Now that you had a glimpse of `autograd`, `nn` depends on `autograd` to
define models and differentiate them. An `nn.Module` contains layers,
and a method `forward(input)` that returns the `output`.

For example, look at this network that classifies digit images:

![convnet](https://pytorch.org/tutorials/_static/img/mnist.png)

It is a simple feed-forward network. It takes the input, feeds it
through several layers one after the other, and then finally gives the
output.

A typical training procedure for a neural network is as follows:

-   Define the neural network that has some learnable parameters (or
    weights)
-   Iterate over a dataset of inputs
-   Process input through the network
-   Compute the loss (how far is the output from being correct)
-   Propagate gradients back into the network's parameters
-   Update the weights of the network, typically using a simple update
    rule: `weight = weight - learning_rate * gradient`

Define the network
------------------

Let's define this network:


In [53]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 4 * 4, 120)  # 4*4 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    # operaciones
    def forward(self, input):
        # Convolution layer C1: 1 input image channel, 6 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch
        c1 = F.relu(self.conv1(input))
        # Subsampling layer S2: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 6, 14, 14) Tensor
        s2 = F.max_pool2d(c1, (2, 2))
        # Convolution layer C3: 6 input channels, 16 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a (N, 16, 10, 10) Tensor
        c3 = F.relu(self.conv2(s2))
        # Subsampling layer S4: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
        s4 = F.max_pool2d(c3, 2)
        # Flatten operation: purely functional, outputs a (N, 400) Tensor
        s4 = torch.flatten(s4, 1)
        # Fully connected layer F5: (N, 400) Tensor input,
        # and outputs a (N, 120) Tensor, it uses RELU activation function
        f5 = F.relu(self.fc1(s4))
        # Fully connected layer F6: (N, 120) Tensor input,
        # and outputs a (N, 84) Tensor, it uses RELU activation function
        f6 = F.relu(self.fc2(f5))
        # Gaussian layer OUTPUT: (N, 84) Tensor input, and
        # outputs a (N, 10) Tensor
        output = self.fc3(f6)
        return output


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


You just have to define the `forward` function, and the `backward`
function (where gradients are computed) is automatically defined for you
using `autograd`. You can use any of the Tensor operations in the
`forward` function.

The learnable parameters of a model are returned by `net.parameters()`


In [54]:
params = list(net.parameters())
print(params[0].size())  # conv1's .weight

torch.Size([6, 1, 5, 5])


In [55]:
#list(net.parameters()) # filtros con pesos aleatorios

### Cargar dataset

In [56]:
# cargar dataset
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

batch_size = 32
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)
# crear variables para entrenamiento para pruebas
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
print(training_data)
print(test_data)
clases=training_data.classes
print(clases)

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )
Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )
['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']


### Obtener caractertisticas y etiquetas de las imágenes

In [57]:
#obtener caracteristicas de imagenes y etiquetas
train_features, train_labels = next(iter(train_dataloader))
print(f"Tamaño de batch, dimensiones y canales: {train_features.size()}")
print(f"Número de etiquetas por cada batch: {train_labels.size()}")

Tamaño de batch, dimensiones y canales: torch.Size([32, 1, 28, 28])
Número de etiquetas por cada batch: torch.Size([32])


### Definir función de perdida y optimizador

In [58]:
import torch.optim as optim

criterio = nn.CrossEntropyLoss()
# momentum sirve para acelerar el proceso de encontrar el valor más cercano a cero
optimizador = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

### Configurar uso de GPU

In [67]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Usando:",device)

# Mover el modelo a GPU/CPU
net.to(device)

Usando: cuda


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

### Entrenamiento del modelo de CNN

In [None]:
# Entrenamiento de modelo
mini_batch = 5000
for epoca in range(30):  # número de épocas

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # get the entradas; data is a list of [entradas, etiquetas]
        entradas, etiquetas = data
        # mover datos a GPU/CPU
        entradas, etiquetas = entradas.to(device), etiquetas.to(device)
        # gradiente de ceros
        optimizador.zero_grad()

        # forward + backward + optimize
        salidas = net(entradas) # envío
        perdida = criterio(salidas, etiquetas) # calculo diferencia o pérdida
        perdida.backward() # Actualiza pesos
        optimizador.step() # optimizo

        # print statistics
        running_loss += perdida.item() # perdida acumulada
        if ((i+1) * batch_size) % mini_batch == 0:    # print every 5000 mini-batches
            print(f'Época [{epoca + 1}, lote {i + 1:5d}] pérdida: {running_loss / (i+1):.3f}')
            running_loss = 0.0

print('Entrenamiento finalizado')

Época [1,   625] pérdida: 0.262
Época [1,  1250] pérdida: 0.133
Época [1,  1875] pérdida: 0.087
Época [2,   625] pérdida: 0.261
Época [2,  1250] pérdida: 0.130
Época [2,  1875] pérdida: 0.088
Época [3,   625] pérdida: 0.263
Época [3,  1250] pérdida: 0.132
Época [3,  1875] pérdida: 0.087
Época [4,   625] pérdida: 0.259
Época [4,  1250] pérdida: 0.131
Época [4,  1875] pérdida: 0.089
Época [5,   625] pérdida: 0.266
Época [5,  1250] pérdida: 0.131
Época [5,  1875] pérdida: 0.087
Época [6,   625] pérdida: 0.256
Época [6,  1250] pérdida: 0.135
Época [6,  1875] pérdida: 0.087
Época [7,   625] pérdida: 0.261
Época [7,  1250] pérdida: 0.131
Época [7,  1875] pérdida: 0.088
Época [8,   625] pérdida: 0.260
Época [8,  1250] pérdida: 0.132
Época [8,  1875] pérdida: 0.088
Época [9,   625] pérdida: 0.260
Época [9,  1250] pérdida: 0.134
Época [9,  1875] pérdida: 0.087
Época [10,   625] pérdida: 0.257
Época [10,  1250] pérdida: 0.133
Época [10,  1875] pérdida: 0.088
Época [11,   625] pérdida: 0.261
Époc

### Guardar modelo

In [61]:
ruta = './mnist_cnn_net.pth'
torch.save(net.state_dict(), ruta)

### Cargar modelo

In [62]:
net = Net()
net.load_state_dict(torch.load(ruta, weights_only=True))

<All keys matched successfully>

### Prueba de modelo

In [63]:
# Prueba de modelo por cada clase
correct_pred = {classname: 0 for classname in clases}
total_pred = {classname: 0 for classname in clases}

# no se necesita la gradiente ya que se entrena una sola vez
with torch.no_grad():
    for data in test_dataloader:
        imagenes, etiquetas = data
        salidas = net(imagenes)
        _, predicciones = torch.max(salidas, 1)
        # collección de predicciones correctas por cada clase
        for etiqueta, prediccion in zip(etiquetas, predicciones):
            if etiqueta == prediccion:
                correct_pred[clases[etiqueta]] += 1
            total_pred[clases[etiqueta]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} es {accuracy:.2f} %')

Accuracy for class: T-shirt/top is 79.9 %
Accuracy for class: Trouser is 97.0 %
Accuracy for class: Pullover is 82.9 %
Accuracy for class: Dress is 91.0 %
Accuracy for class: Coat  is 80.0 %
Accuracy for class: Sandal is 96.8 %
Accuracy for class: Shirt is 71.3 %
Accuracy for class: Sneaker is 96.6 %
Accuracy for class: Bag   is 96.2 %
Accuracy for class: Ankle boot is 94.6 %


### Precisión total

In [64]:
correcto = 0
total = 0
# no se necesita la gradiente
with torch.no_grad():
    for data in test_dataloader:
        imagenes, etiquetas = data
        # calculate outputs by running images through the network
        salidas = net(imagenes)
        # the class with the highest energy is what we choose as prediction
        _, prediccion = torch.max(salidas, 1)
        total += etiquetas.size(0)
        correcto += (prediccion == etiquetas).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correcto // total} %')

Accuracy of the network on the 10000 test images: 88 %


In [65]:
#input = torch.randn(1, 1, 28, 28) # N lotes o batch, canales de entrada, dimensiones (w, h)
input = train_features[0].unsqueeze(0) # enviar de uno a uno
print(input.size())
out = net(input)
print(out)

torch.Size([1, 1, 28, 28])
tensor([[ 1.9865, -2.3575,  2.0949,  2.6959,  1.4992, -3.1907,  2.4035, -2.6306,
         -1.9733, -2.2122]], grad_fn=<AddmmBackward0>)
