<a href="https://colab.research.google.com/github/Aravinda89/Pytorch-Tutorial/blob/main/03_04_Using_Optimizers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Using optimizers

In [1]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   

In [4]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [5]:
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [6]:
model.parameters()

<generator object Module.parameters at 0x7f8dae89d550>

In [7]:
from torch import optim

criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)


In [8]:
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)
        

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-7.2154e-04, -7.2154e-04, -7.2154e-04,  ..., -7.1856e-04,
         -7.2154e-04, -7.2154e-04],
        [ 1.5117e-03,  1.5168e-03,  1.5292e-03,  ...,  1.5526e-03,
          1.5154e-03,  1.5168e-03],
        [-4.5585e-04, -4.5585e-04, -4.5585e-04,  ..., -4.5585e-04,
         -4.5585e-04, -4.5585e-04],
        ...,
        [-9.6583e-05, -9.6583e-05, -9.6583e-05,  ..., -1.5217e-04,
         -9.6583e-05, -9.6583e-05],
        [-5.6446e-04, -5.6559e-04, -5.5394e-0

In [9]:
optimizer.step()

In [10]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0232, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0065,  0.0125,  ...,  0.0285,  0.0349, -0.0106]],
       requires_grad=True)
Initial weights gradient :  tensor([[-7.2154e-04, -7.2154e-04, -7.2154e-04,  ..., -7.1856e-04,
         -7.2154e-04, -7.2154e-04],
        [ 1.5117e-03,  1.5168e-03,  1.5292e-03,  ...,  1.5526e-03,
          1.5154e-03,  1.5168e-03],
        [-4.5585e-04, -4.5585e-04, -4.5585e-04,  ..., -4.5585e-04,
         -4.5585e-04, -4.5585e-04],
        ...,
        [-9.6583e-05, -9.6583e-05, -9.6583e-05,  ..., -1.5217e-04,
         -9.6583e-05, -9.6583e-05],
        [-5.6446e-04, -5.6559e-04, -5.5394e-0

In [12]:
optimizer.zero_grad()

In [13]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0105,  ..., -0.0203, -0.0060, -0.0300],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0232, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0065,  0.0125,  ...,  0.0285,  0.0349, -0.0106]],
       requires_grad=True)
Initial weights gradient :  tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


In [15]:
model = FMNIST()
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

num_epochs = 1

for i in range(num_epochs):
    cum_loss = 0
    batch_num = 0

    for batch_num, (images, labels) in enumerate(trainloader,1):
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        cum_loss += loss.item()
        print(f'Batch: {batch_num}, Loss: {loss.item()}')
     
    print(f"Training loss: {cum_loss/len(trainloader)}")

Batch: 1, Loss: 2.288628339767456
Batch: 2, Loss: 2.3080620765686035
Batch: 3, Loss: 2.3098013401031494
Batch: 4, Loss: 2.284043312072754
Batch: 5, Loss: 2.291738986968994
Batch: 6, Loss: 2.3061680793762207
Batch: 7, Loss: 2.2870423793792725
Batch: 8, Loss: 2.276984453201294
Batch: 9, Loss: 2.2776780128479004
Batch: 10, Loss: 2.3024075031280518
Batch: 11, Loss: 2.270155906677246
Batch: 12, Loss: 2.288559913635254
Batch: 13, Loss: 2.2820968627929688
Batch: 14, Loss: 2.2649171352386475
Batch: 15, Loss: 2.274874210357666
Batch: 16, Loss: 2.2665631771087646
Batch: 17, Loss: 2.2486894130706787
Batch: 18, Loss: 2.25093150138855
Batch: 19, Loss: 2.2826104164123535
Batch: 20, Loss: 2.2532405853271484
Batch: 21, Loss: 2.234309434890747
Batch: 22, Loss: 2.2593002319335938
Batch: 23, Loss: 2.248821496963501
Batch: 24, Loss: 2.2512760162353516
Batch: 25, Loss: 2.2572567462921143
Batch: 26, Loss: 2.2401793003082275
Batch: 27, Loss: 2.214656352996826
Batch: 28, Loss: 2.226203441619873
Batch: 29, Los

In [17]:
60000/64 # batches

937.5