# Autograd

In [1]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

In [3]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)

  def forward(self, x):
    x = x.view(x.shape[0], -1)

    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)

    return x

#model = FMNIST()

In [4]:
# Here we are using the nn.sequentional for defining the model
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [5]:
# For checking the 1st layer, 2nd layer etc...
model[0], model[1]

(Linear(in_features=784, out_features=128, bias=True), ReLU())

In [6]:
# Take the images and labels
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [7]:
# Defining the loss function
criterion = nn.NLLLoss()

In [8]:
# Print out the initial values of the layer
print('Initial Weights : ',model[0].weight)

Initial Weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)


In [9]:
#Check the gradient
# Since we haven't done the backward pass, we will not have gradient
print('Initial Weights : ',model[0].weight.grad)

Initial Weights :  None


In [10]:
# do the forward pass and calculate the loss and do the backward pass
output = model(images)
loss = criterion(output, labels)
loss.backward()
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)


Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-0.0030, -0.0030, -0.0030,  ..., -0.0030, -0.0030, -0.0030],
        [ 0.0022,  0.0022,  0.0022,  ...,  0.0024,  0.0022,  0.0022],
        [ 0.0002,  0.0002,  0.0002,  ...,  0.0002,  0.0002,  0.0002],
        ...,
        [ 0.0014,  0.0014,  0.0014,  ...,  0.0014,  0.0014,  0.0014],
        [ 0.0021,  0.0021,  0.0021,  ...,  0.0022,  0.0021,  0.0021],
        [ 0.0038,  0.0038,  0.0038,  ...,  0.0038,  0.0038,  0.0038]])


In [11]:
# activation function such as ReLU dont have activation functon with them
model[1].weight # we can see that there is no weight value asssociated with it

AttributeError: ignored

In [12]:
# similarly model 2 -- linear function
model[2], model[2].weight

# we can see that there are weights assosiated

(Linear(in_features=128, out_features=64, bias=True),
 Parameter containing:
 tensor([[-0.0825, -0.0339,  0.0713,  ..., -0.0854,  0.0097, -0.0645],
         [ 0.0488,  0.0832,  0.0728,  ..., -0.0440,  0.0541,  0.0612],
         [-0.0055, -0.0093, -0.0157,  ...,  0.0782,  0.0032, -0.0530],
         ...,
         [ 0.0009,  0.0023, -0.0776,  ...,  0.0575,  0.0759,  0.0016],
         [-0.0815, -0.0214, -0.0414,  ...,  0.0702, -0.0252,  0.0703],
         [-0.0815,  0.0329, -0.0134,  ...,  0.0773,  0.0774,  0.0468]],
        requires_grad=True))