In [1]:
%matplotlib inline

Neural Networks
===============

Neural networks can be constructed using the `torch.nn` package.

Now that you had a glimpse of `autograd`, `nn` depends on `autograd` to
define models and differentiate them. An `nn.Module` contains layers,
and a method `forward(input)` that returns the `output`.

For example, look at this network that classifies digit images:

![convnet](https://pytorch.org/tutorials/_static/img/mnist.png)

It is a simple feed-forward network. It takes the input, feeds it
through several layers one after the other, and then finally gives the
output.

A typical training procedure for a neural network is as follows:

-   Define the neural network that has some learnable parameters (or
    weights)
-   Iterate over a dataset of inputs
-   Process input through the network
-   Compute the loss (how far is the output from being correct)
-   Propagate gradients back into the network's parameters
-   Update the weights of the network, typically using a simple update
    rule: `weight = weight - learning_rate * gradient`

Define the network
------------------

Let's define this network:


In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 4 * 4, 120)  # 4*4 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    # operaciones
    def forward(self, input):
        # Convolution layer C1: 1 input image channel, 6 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a Tensor with size (N, 6, 28, 28), where N is the size of the batch
        c1 = F.relu(self.conv1(input))
        # Subsampling layer S2: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 6, 14, 14) Tensor
        s2 = F.max_pool2d(c1, (2, 2))
        # Convolution layer C3: 6 input channels, 16 output channels,
        # 5x5 square convolution, it uses RELU activation function, and
        # outputs a (N, 16, 10, 10) Tensor
        c3 = F.relu(self.conv2(s2))
        # Subsampling layer S4: 2x2 grid, purely functional,
        # this layer does not have any parameter, and outputs a (N, 16, 5, 5) Tensor
        s4 = F.max_pool2d(c3, 2)
        # Flatten operation: purely functional, outputs a (N, 400) Tensor
        s4 = torch.flatten(s4, 1)
        # Fully connected layer F5: (N, 400) Tensor input,
        # and outputs a (N, 120) Tensor, it uses RELU activation function
        f5 = F.relu(self.fc1(s4))
        # Fully connected layer F6: (N, 120) Tensor input,
        # and outputs a (N, 84) Tensor, it uses RELU activation function
        f6 = F.relu(self.fc2(f5))
        # Gaussian layer OUTPUT: (N, 84) Tensor input, and
        # outputs a (N, 10) Tensor
        output = self.fc3(f6)
        return output


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


You just have to define the `forward` function, and the `backward`
function (where gradients are computed) is automatically defined for you
using `autograd`. You can use any of the Tensor operations in the
`forward` function.

The learnable parameters of a model are returned by `net.parameters()`


In [3]:
params = list(net.parameters())
print(params[0].size())  # conv1's .weight

torch.Size([6, 1, 5, 5])


In [4]:
#list(net.parameters()) # filtros con pesos aleatorios

### Cargar dataset

In [5]:
# cargar dataset
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import transforms
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

batch_size = 32
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=transform
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=transform
)
# crear variables para entrenamiento para pruebas
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True)
print(training_data)
print(test_data)
clases=training_data.classes
print(clases)

100%|██████████| 26.4M/26.4M [00:01<00:00, 18.9MB/s]
100%|██████████| 29.5k/29.5k [00:00<00:00, 302kB/s]
100%|██████████| 4.42M/4.42M [00:00<00:00, 5.61MB/s]
100%|██████████| 5.15k/5.15k [00:00<00:00, 7.63MB/s]

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )
Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )
['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']





### Obtener caractertisticas y etiquetas de las imágenes

In [6]:
#obtener caracteristicas de imagenes y etiquetas
train_features, train_labels = next(iter(train_dataloader))
print(f"Tamaño de batch, dimensiones y canales: {train_features.size()}")
print(f"Número de etiquetas por cada batch: {train_labels.size()}")

Tamaño de batch, dimensiones y canales: torch.Size([32, 1, 28, 28])
Número de etiquetas por cada batch: torch.Size([32])


### Definir función de perdida y optimizador

In [7]:
import torch.optim as optim

criterio = nn.CrossEntropyLoss()
# momentum sirve para acelerar el proceso de encontrar el valor más cercano a cero
optimizador = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

### Configurar uso de GPU

In [8]:
# elección de arquitectura
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print("Usando:",device)

# Mover el modelo a GPU/CPU
net.to(device)

Usando: cpu


Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

### Entrenamiento del modelo de CNN

In [9]:
# Entrenamiento de modelo
mini_batch = 5000
for epoca in range(20):  # número de épocas

    running_loss = 0.0
    umbral = mini_batch
    for i, data in enumerate(train_dataloader, 0):
        # get the entradas; data is a list of [entradas, etiquetas]
        entradas, etiquetas = data
        # mover datos a GPU/CPU
        entradas, etiquetas = entradas.to(device), etiquetas.to(device)
        # gradiente de ceros
        optimizador.zero_grad()

        # forward + backward + optimize
        salidas = net(entradas) # envío
        perdida = criterio(salidas, etiquetas) # calculo diferencia o pérdida
        perdida.backward() # Actualiza pesos
        optimizador.step() # optimizo

        # print statistics
        running_loss += perdida.item() # perdida acumulada
        if ((i+1) * batch_size) >= umbral:    # print every 5000 mini-batches
            print(f'Época [{epoca + 1}, lote {i + 1:5d}] pérdida: {running_loss / (i+1):.3f}')
            running_loss = 0.0
            umbral += mini_batch

print('Entrenamiento finalizado')

Época [1, lote   157] pérdida: 1.694
Época [1, lote   313] pérdida: 0.391
Época [1, lote   469] pérdida: 0.219
Época [1, lote   625] pérdida: 0.144
Época [1, lote   782] pérdida: 0.110
Época [1, lote   938] pérdida: 0.085
Época [1, lote  1094] pérdida: 0.072
Época [1, lote  1250] pérdida: 0.060
Época [1, lote  1407] pérdida: 0.051
Época [1, lote  1563] pérdida: 0.044
Época [1, lote  1719] pérdida: 0.040
Época [1, lote  1875] pérdida: 0.035
Época [2, lote   157] pérdida: 0.415
Época [2, lote   313] pérdida: 0.204
Época [2, lote   469] pérdida: 0.130
Época [2, lote   625] pérdida: 0.099
Época [2, lote   782] pérdida: 0.071
Época [2, lote   938] pérdida: 0.057
Época [2, lote  1094] pérdida: 0.054
Época [2, lote  1250] pérdida: 0.045
Época [2, lote  1407] pérdida: 0.041
Época [2, lote  1563] pérdida: 0.035
Época [2, lote  1719] pérdida: 0.032
Época [2, lote  1875] pérdida: 0.028
Época [3, lote   157] pérdida: 0.329
Época [3, lote   313] pérdida: 0.169
Época [3, lote   469] pérdida: 0.108
É

### Validación en entrenamiento

In [15]:
correcto = 0
total = 0
with torch.no_grad():
    for entradas, etiquetas in train_dataloader:
        entradas, etiquetas = entradas.to(device), etiquetas.to(device)
        salidas = net(entradas)
        _, pred = torch.max(salidas, 1) # mayor valor
        total += etiquetas.size(0)
        correcto += (pred == etiquetas).sum().item()

print(f'Precisión en entrenamiento: {100 * correcto / total:.2f}%')

Precisión en entrenamiento: 94.49%


### Guardar modelo

In [10]:
ruta = './mnist_cnn_net.pth'
torch.save(net.state_dict(), ruta)

### Cargar modelo

In [11]:
net = Net()
net.load_state_dict(torch.load(ruta, weights_only=True))

<All keys matched successfully>

### Prueba de modelo

In [12]:
# Prueba de modelo por cada clase
correct_pred = {classname: 0 for classname in clases}
total_pred = {classname: 0 for classname in clases}

# no se necesita la gradiente ya que se entrena una sola vez
with torch.no_grad():
    for data in test_dataloader:
        imagenes, etiquetas = data
        salidas = net(imagenes)
        _, predicciones = torch.max(salidas, 1)
        # collección de predicciones correctas por cada clase
        for etiqueta, prediccion in zip(etiquetas, predicciones):
            if etiqueta == prediccion:
                correct_pred[clases[etiqueta]] += 1
            total_pred[clases[etiqueta]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} es {accuracy:.2f} %')

Accuracy for class: T-shirt/top es 83.30 %
Accuracy for class: Trouser es 96.90 %
Accuracy for class: Pullover es 86.50 %
Accuracy for class: Dress es 91.80 %
Accuracy for class: Coat  es 84.20 %
Accuracy for class: Sandal es 98.10 %
Accuracy for class: Shirt es 65.90 %
Accuracy for class: Sneaker es 96.50 %
Accuracy for class: Bag   es 96.30 %
Accuracy for class: Ankle boot es 96.40 %


### Precisión total

In [16]:
correcto = 0
total = 0
# no se necesita la gradiente
with torch.no_grad():
    for data in test_dataloader:
        imagenes, etiquetas = data
        # calculate outputs by running images through the network
        salidas = net(imagenes)
        # the class with the highest energy is what we choose as prediction
        _, prediccion = torch.max(salidas, 1)
        total += etiquetas.size(0)
        correcto += (prediccion == etiquetas).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correcto / total:.2f} %')

Accuracy of the network on the 10000 test images: 89.59 %


In [14]:
#input = torch.randn(1, 1, 28, 28) # N lotes o batch, canales de entrada, dimensiones (w, h)
input = train_features[0].unsqueeze(0) # enviar de uno a uno
print(input.size())
out = net(input)
print(out)

torch.Size([1, 1, 28, 28])
tensor([[ 16.5396, -10.5250,   8.8695,   0.6373,  -0.2618,  -7.2057,   9.7469,
          -8.4352,  -1.0282,  -8.5567]], grad_fn=<AddmmBackward0>)
