![Neural net diag](assets/mnist-neural-net-diagram.png)

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [9]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__() #Not sure why

        self.conv1 = nn.Conv2d(1, 6, 5) 
        #Initialising the internal module state in the parameters (thanks to the python hover details which showed up)
        #The parameters were (Number of inputs, Number of outputs, Kernel size)

        self.conv2 = nn.Conv2d(6, 16, 5) #same explaination as above

        #So above we have initialised C1 and C3 as seen in the diagram (these will be called during the forward pass)

        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

        #Okay what i did get was that we have three linear layers at the end.
        #The parameters are similar (Input, Output), other thing i noticed was the bias is set to True for this layer
        #And i am guessing fc stands for full connection. But still not aware of this, not sure if i encountered this in the zero to hero series actually.

    def forward(self, input):

        c1 = F.relu(self.conv1(input))
        s2 = F.max_pool2d(c1, (2, 2))
        c3 = F.relu(self.conv2(s2))
        s4 = F.max_pool2d(c3, 2)
        s4 = torch.flatten(s4, 1)

        f5 = F.relu(self.fc1(s4))
        f6 = F.relu(self.fc2(f5))
        output = self.fc3(f6)

        return output

In [10]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [11]:
#we learned that we need to define the learnable parameters of the NN
#So the learnable parameters of a model are returned by net.parameters() method

params = list(net.parameters())

print(params)   #i dont really have to print them, but i wanna see xD
print(len(params))
print(params[0].size()) #conv1's weight

[Parameter containing:
tensor([[[[-0.0297, -0.1596,  0.0603, -0.1300, -0.1778],
          [ 0.1952, -0.1264, -0.0816, -0.0893, -0.1016],
          [ 0.1483,  0.1945, -0.0081,  0.1466, -0.0594],
          [-0.0750, -0.0187,  0.0746, -0.0303, -0.1842],
          [ 0.1954, -0.0182, -0.0344,  0.0437, -0.1530]]],


        [[[-0.0334,  0.0439, -0.0635,  0.0532,  0.1574],
          [-0.1294, -0.1516,  0.1534,  0.1481,  0.0052],
          [ 0.1184, -0.0014, -0.0041,  0.0103, -0.1791],
          [-0.1282, -0.0987, -0.1662,  0.1182,  0.1370],
          [ 0.1033,  0.1498,  0.1331, -0.1099, -0.0038]]],


        [[[ 0.1801, -0.1987,  0.1967, -0.0244,  0.1177],
          [ 0.1580,  0.0872, -0.0178,  0.0299, -0.0665],
          [ 0.1688, -0.0638, -0.0451,  0.0069, -0.1936],
          [ 0.1642,  0.1959,  0.0285,  0.0586, -0.0027],
          [ 0.0232,  0.1881,  0.0552, -0.1870, -0.1240]]],


        [[[-0.0723, -0.0856,  0.0833,  0.0227, -0.1836],
          [-0.0766, -0.1646, -0.1045, -0.0219, -0.003

In [12]:
#trying a random 32x32 input

input = torch.randn(1, 1, 32, 32)
out = net(input)

print(out)

tensor([[-0.1027, -0.0909,  0.0766,  0.0780, -0.1018, -0.0716,  0.0426, -0.0300,
          0.0404, -0.0183]], grad_fn=<AddmmBackward0>)


In [13]:
#setting the gradients to zero first and then calling the backprop (on random gradients)
net.zero_grad()
out.backward(torch.randn(1, 10))

In [16]:
output = net(input)

target = torch.rand(10) #random, for example
target = target.view(1, -1) #for making same shape as output

criterion = nn.MSELoss() #Mean Squared Error
#there are more loss functions but we are using this

loss = criterion(output, target)
print(loss)

tensor(0.3178, grad_fn=<MseLossBackward0>)


In [17]:
#we've got the loss value
#now we have to backprop the error

#before that we have to reset the gradients, as they will get added up to the existing gradients

net.zero_grad() #zeroes the gradient buffers of all parameters
print(net.conv1.bias.grad)

loss.backward()

print(net.conv1.bias.grad)

None
tensor([ 0.0017, -0.0015,  0.0032,  0.0101, -0.0026,  0.0029])


In [18]:
#Now the last step is to update the weights of the network
#The simplest method being stochastic gradient descent (sgd)

#The natural formula is weight = weight - learning_rate * gradient
#but instead there is a pytorch package provided to us

import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr=0.01)

In [27]:
#so the optimizer is created lets reframe the training loop

optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()

In [28]:
print(loss)

tensor(0.2967, grad_fn=<MseLossBackward0>)
