### Salvar y guardar modelos en Pytorch

Cuando se trata de guardar y cargar modelos, hay tres funciones básicas con las que debe estar familiarizado:

`torch.save`: Guarda un objeto serializado en el disco. Esta función utiliza la utilidad pickle de Python para la serialización. Con esta función se pueden guardar modelos, tensores y diccionarios de todo tipo de objetos.

`torch.load`: Permite cargar y restaurar tensores, modelos, optimizadores y otros objetos de PyTorch desde archivos guardados en disco.

`torch.nn.Module.load_state_dict`: Carga el diccionario de parámetros de un modelo utilizando un state_dict deserializado.

In [None]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets 
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import shutil#shell utils
import numpy as np
import os

In [None]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)


print(f"Amount of training data: {len(training_data.targets)}, and test data: {len(test_data.targets)}")

batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [None]:
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {device} device")

#Defining the model
class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(nn.Linear(in_features=28*28, out_features=512),
                                           nn.ReLU(),
                                           nn.Linear(512, 512),
                                           nn.ReLU(),
                                           nn.Linear(512, 10))
    
  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

In [None]:
model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

#training 
def training(model, dataloader, loss_fn, optimizer):
  size = len(dataloader.dataset)#actual dataset size
  model.train()
  for batch_id, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)
    
    #getting the class predictions
    pred = model(X)
    #computing the loss
    loss = loss_fn(pred, y)

    # weights update
    optimizer.zero_grad() # to "clean" the gradients memory
    loss.backward() # chain rule 
    optimizer.step() # weights updating

    if batch_id % 100 == 0:
      loss, current_sample = loss.item(), batch_id*len(X)
      print(f"loss: {loss:>3f} [{current_sample:>5d}/{size:>5d}]")

#test loop
def test(model, dataloader, loss_fn):
  size = len(dataloader.dataset)#amount of available data
  num_batches = len(dataloader) 
  model.eval()#avoid the model learning
  test_loss, correct = 0, 0
  # no gradients calculation
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      #get the predictions
      pred = model(X)
      #computing the loss 
      test_loss += loss_fn(pred, y).item() #Returns the value of this tensor as a standard Python number
      correct += (pred.argmax(1)==y).type(torch.float).sum().item()
  
  test_loss /= num_batches
  correct /= size
  acc = 100*correct
  print(f"Test error: \n Acc: {(acc):>0.1f}%, avg loss: {test_loss:>8f}\n")


In [None]:
epochs = 5
for i in range(epochs):
  print(f"Epoch {i+1}\n ---------------------------")
  training(model, train_dataloader, loss_fn, optimizer)
  test(model, test_dataloader, loss_fn)
print("done")

In [None]:
model.state_dict()

¿Qué es un estado_dict?
En PyTorch, los parámetros aprendibles (es decir, pesos y sesgos) de un modelo torch.nn.Module están contenidos en los parámetros del modelo (a los que se accede con model.parameters()). Un state_dict es simplemente un objeto diccionario de Python que asigna cada capa a su tensor de parámetros. Tenga en cuenta que sólo las capas con parámetros aprendibles (capas convolucionales, capas lineales, etc.) tienen entradas en el state_dict del modelo. Los objetos optimizadores (torch.optim) también tienen un state_dict, que contiene información sobre el estado del optimizador, así como los hiperparámetros utilizados.

Debido a que los objetos state_dict son diccionarios de Python, pueden ser fácilmente guardados, actualizados, alterados y restaurados, añadiendo una gran modularidad a los modelos y optimizadores de PyTorch.



In [None]:
print("Model's state_dict:")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

# Print optimizer's state_dict
print("Optimizer's state_dict:")
for var_name in optimizer.state_dict():
    print(var_name, "\t", optimizer.state_dict()[var_name])

`torch.save(model, PATH)`

In [None]:
os.makedirs("data/models/", exist_ok=True)
model_name = "my_first_model.pt"
model_pt = "data/models/" + model_name

torch.save(model.state_dict(), model_pt)
print("Saved PyTorch Model State to model.pt")

`model = torch.load(PATH)`

`model.eval()`

In [None]:
model = NeuralNetwork()
model.load_state_dict(torch.load(model_pt))
print("model loaded correctly!")

In [None]:
classes = [
    "T-shirt/top",
    "Trouser",
    "Pullover",
    "Dress",
    "Coat",
    "Sandal",
    "Shirt",
    "Sneaker",
    "Bag",
    "Ankle boot",
]

x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
  pred = model(x)
  predicted, actual = classes[pred[0].argmax(0)], classes[y]
  print(f"Predicted: '{predicted}' Actual: '{actual}'")

In [None]:
torch.save({
    'epoch': epochs,
    'modelo_state_dict': model.state_dict(),
    'optimizador_state_dict': optimizer.state_dict(),
    'loss': loss_fn
}, "modelo.pt")

In [None]:
checkpoint = torch.load("modelo.pt")
modelo_cargado = NeuralNetwork()
modelo_cargado.load_state_dict(checkpoint['modelo_state_dict'])
modelo_cargado.eval() 
optimizer.load_state_dict(checkpoint['optimizador_state_dict'])
epoch_cargada = checkpoint['epoch']
loss_cargada = checkpoint['loss']


Continuando el entrenamiento 

In [None]:
checkpoint = torch.load("modelo.pt")
model = NeuralNetwork().to(device)
model.load_state_dict(checkpoint['modelo_state_dict'])
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
optimizer.load_state_dict(checkpoint['optimizador_state_dict'])
start_epoch = checkpoint['epoch']
loss = checkpoint['loss']


for i in range(start_epoch,start_epoch+5):
  print(f"Epoch {1+i}\n ---------------------------")
  training(model, train_dataloader, loss_fn, optimizer)
  test(model, test_dataloader, loss_fn)
  
print("done")