# Optimization

In [1]:
# Pre-requsite code

import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda

training_data = datasets.FashionMNIST(
    root = "data",
    train = True,
    download = True,
    transform = ToTensor()
)

test_data = datasets.FashionMNIST(
    root = "data",
    train = False,
    download = True,
    transform = ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=26421880.0), HTML(value='')))


Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=29515.0), HTML(value='')))


Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=4422102.0), HTML(value='')))


Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=0.0, max=5148.0), HTML(value='')))


Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Processing...


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Done!


In [4]:
train_dataloader = DataLoader(training_data, batch_size = 64)
test_dataloader = DataLoader(test_data, batch_size = 64)

class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10),
        nn.ReLU()
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

model = NeuralNetwork()

## Hyperparameter

1. epoch
2. batch size
3. learning rate


In [5]:
learning_rate = 1e-3
batch_size = 64
epoch = 5

In [6]:
# loss function

loss_fn = nn.CrossEntropyLoss()

In [7]:
# Optimizer

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

In training loop, there are three steps in optimization.

1. optimizer.zero_grad()
  - Reset the degree of change of model hyperparameter. By default, set explicitly to zero each iteration to prevent duplicate calculations

2. loss.backward()
  - Backpropagation the prediction loss. PyTorch save the degree of change of loss per hyperparameters.

3. optimizer.step()
 - Adjust the parameters to the degrees of change collected during the backpropagation phase.

In [14]:
# Total Code:

def train_loop(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  for batch, (X, y) in enumerate(dataloader):
    # calculating prediction and loss

    pred = model(X)
    loss = loss_fn(pred, y)

    # Backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  test_loss, correct = 0, 0

  with torch.no_grad():
    for X, y in dataloader:
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()

  test_loss /= size
  correct /= size
  print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [15]:
# initializer loss and optimizer and deliver to train and test loop

loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

epochs = 10

for t in range(epochs):
  print(f"Epoch {t+1}\n-------------------------------")
  train_loop(train_dataloader, model, loss_fn, optimizer)
  test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 2.191342 [    0/60000]
loss: 2.203555 [ 6400/60000]
loss: 2.149551 [12800/60000]
loss: 2.192512 [19200/60000]
loss: 2.117167 [25600/60000]
loss: 2.064287 [32000/60000]
loss: 2.133490 [38400/60000]
loss: 2.040699 [44800/60000]
loss: 2.054240 [51200/60000]
loss: 2.048330 [57600/60000]
Test Error: 
 Accuracy: 44.3%, Avg loss: 0.031841 

Epoch 2
-------------------------------
loss: 2.013123 [    0/60000]
loss: 2.043933 [ 6400/60000]
loss: 1.932612 [12800/60000]
loss: 2.044818 [19200/60000]
loss: 1.887876 [25600/60000]
loss: 1.811403 [32000/60000]
loss: 1.948053 [38400/60000]
loss: 1.776486 [44800/60000]
loss: 1.822384 [51200/60000]
loss: 1.843844 [57600/60000]
Test Error: 
 Accuracy: 45.5%, Avg loss: 0.028301 

Epoch 3
-------------------------------
loss: 1.766454 [    0/60000]
loss: 1.837140 [ 6400/60000]
loss: 1.670490 [12800/60000]
loss: 1.876840 [19200/60000]
loss: 1.654236 [25600/60000]
loss: 1.574194 [32000/60000]
loss: 1.778505 [38400/

# Save the model and Recall

In [16]:
import torch
import torch.onnx as onnx
import torchvision.models as models

In [20]:
model = models.vgg16(pretrained = True)
torch.save(model.state_dict(), 'model_weights.pth')

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth


HBox(children=(FloatProgress(value=0.0, max=553433881.0), HTML(value='')))




In [22]:
model = models.vgg16()  # Preferred=True is not specified because it does not load the default weight.
model.load_state_dict(torch.load('model_weights.pth')) # call hyperparameters
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [23]:
torch.save(model, 'model.pth')

In [24]:
model = torch.load('model.pth')

PyTorch supports native ONNX exports. However, due to the dynamic nature of the PyTorch execution graph, the export process requires traversing the execution graph to create an ONNX model. For this reason, the export phase must pass test variables of the appropriate size. (The example below produces a false 0 tensor of the correct size):

In [25]:
input_image = torch.zeros((1, 3, 224, 224))
onnx.export(model, input_image, 'model.onnx')