In [2]:
#referring tutorial by Soumith Chintala
#Deep Learning with PyTorch: A 60 Minute Blitz 
# https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py

In [3]:
#A typical training procedure for a neural network is as follows:

    #Define the neural network that has some learnable parameters (or weights)
    #Iterate over a dataset of inputs
    #Process input through the network
    #Compute the loss (how far is the output from being correct)
    #Propagate gradients back into the network’s parameters
    #Update the weights of the network, typically using a simple update rule: 
    #weight = weight - learning_rate * gradient

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [29]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        #input_channel/depth: 1, output_channel/depth: 6, kernel/size_of_filter: 5x5
        self.convolution_layer1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.convolution_layer2 = nn.Conv2d(6, 16, 5) #parameter names as above layer
        self.fully_connected_layer1 = nn.Linear(in_features=16*5*5, out_features=120)
        self.fully_connected_layer2 = nn.Linear(120, 84)#parameter names as above layer
        self.fully_connected_layer3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = self.convolution_layer1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2,2))

        x = self.convolution_layer2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2,2))
        
        # flatten convolution output before feeding it to fully connected layer
        x = torch.flatten(x, 1) #flattens all dimension except batch dimension

        x = self.fully_connected_layer1(x)
        x= F.relu(x)

        x = self.fully_connected_layer2(x)
        x = F.relu(x)
        
        x = self.fully_connected_layer3(x)
        
        return x

In [30]:
net = Net()

In [31]:
net

Net(
  (convolution_layer1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (convolution_layer2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fully_connected_layer1): Linear(in_features=400, out_features=120, bias=True)
  (fully_connected_layer2): Linear(in_features=120, out_features=84, bias=True)
  (fully_connected_layer3): Linear(in_features=84, out_features=10, bias=True)
)

In [32]:
parameters = list(net.parameters())
print(f"len of parameters: {len(parameters)}")
print(parameters[0].size()) #convolution_layer1's weights
print(parameters[0][0][0])

len of parameters: 10
torch.Size([6, 1, 5, 5])
tensor([[ 0.0893, -0.1300, -0.1894, -0.0971,  0.0847],
        [ 0.1682,  0.0047, -0.1227,  0.1464,  0.0493],
        [ 0.0637,  0.1271,  0.1014, -0.1777, -0.1160],
        [ 0.1465, -0.0948, -0.0067, -0.1855,  0.0125],
        [-0.0749, -0.1677,  0.0065,  0.0723, -0.1954]],
       grad_fn=<SelectBackward>)


In [33]:
#Let’s try a random 32x32 input on Net() object
input_data = torch.randn(1, 1, 32, 32)
input_data

tensor([[[[ 0.1155,  1.8552, -0.1924,  ...,  0.0400,  2.1606, -1.8923],
          [-1.7216, -0.3864, -0.6514,  ...,  0.4989,  0.6088,  0.6107],
          [-1.4288,  3.5789,  1.4896,  ..., -0.3592,  0.4668, -0.9086],
          ...,
          [-1.0659,  0.4517, -0.0489,  ...,  2.6738,  1.4880, -0.1249],
          [-1.7044, -0.6872,  0.8221,  ..., -0.5326, -0.0487,  1.1942],
          [ 2.1396,  0.8312, -0.0056,  ..., -0.3850,  1.4916, -0.0119]]]])

In [34]:
output = net(input_data)
print(f"output: {output}")
print(f"length: {output.shape}") #1x10 as in 10 classes in output of final layer

output: tensor([[ 0.0421, -0.0473,  0.0209, -0.0232, -0.0098,  0.0325, -0.0553,  0.0060,
         -0.0698,  0.1013]], grad_fn=<AddmmBackward>)
length: torch.Size([1, 10])


In [35]:
net.zero_grad() #Zero the gradient buffers of all parameters
output.backward(torch.randn(1, 10)) #backprops with random gradients:

In [98]:
#Loss Function
#A loss function takes the (output, target) pair of inputs,
#and computes a value that estimates how far away the output is from the target.
output = net(input_data)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output

In [99]:
print('conv1.bias.grad before backward')
print(net.convolution_layer1.bias.grad)
print('conv1.weights.grad before backward')
print(net.convolution_layer1.weight.grad)

tensor([[[ 0.0893, -0.1300, -0.1894, -0.0971,  0.0847],
         [ 0.1682,  0.0047, -0.1227,  0.1464,  0.0493],
         [ 0.0637,  0.1271,  0.1014, -0.1777, -0.1160],
         [ 0.1465, -0.0948, -0.0067, -0.1855,  0.0125],
         [-0.0749, -0.1677,  0.0065,  0.0723, -0.1954]]],
       grad_fn=<SelectBackward>)

In [100]:
print(f"{output}")
print(f"shape: {output.shape}")

tensor([[ 0.0421, -0.0473,  0.0209, -0.0232, -0.0098,  0.0325, -0.0553,  0.0060,
         -0.0698,  0.1013]], grad_fn=<AddmmBackward>)
shape: torch.Size([1, 10])


In [101]:
print(f"{target}")
print(f"shape: {target.shape}")

tensor([[ 1.5852, -0.7094, -1.2697,  0.3324, -1.0752, -0.8667,  0.2802, -1.2595,
          0.3636, -0.9211]])
shape: torch.Size([1, 10])


In [102]:
criterion = nn.MSELoss()
loss = criterion(output, target)
loss

tensor(0.9503, grad_fn=<MseLossBackward>)

In [103]:
loss.backward()

In [104]:
net.convolution_layer1.weight[0]

tensor([[[ 0.0893, -0.1300, -0.1894, -0.0971,  0.0847],
         [ 0.1682,  0.0047, -0.1227,  0.1464,  0.0493],
         [ 0.0637,  0.1271,  0.1014, -0.1777, -0.1160],
         [ 0.1465, -0.0948, -0.0067, -0.1855,  0.0125],
         [-0.0749, -0.1677,  0.0065,  0.0723, -0.1954]]],
       grad_fn=<SelectBackward>)

In [105]:
net.zero_grad() # zeroes the gradient buffers of all parameters