# Neural Networks

+ Neural networks can be constructed using the __torch.nn__ package.
An __nn.Module__ contains __layers and a method forward(input) that returns the output__.
+ The learnable parameters of a model are returned by __net.parameters()__
+ __net.zero_grad()__ zero the gradient buffers of all parameters
+ __out.backward()__ backprops
+ __torch.nn__ only supports mini-batches. Example, nn.Conv2d will take in a 4D Tensor of __nSamples * nChannels * Height * Width__
+ If you have a single sample, just use __input.unsqueeze(0)__ to add a fake batch dimension
+ use __tensor.data__ to obtain the data without tracking the operation done on the tensor
+ __Losses__ are defined in __nn__ package
+ __torch.optim__ package implements all the optimization methods as SGD, Nesterov-SGD, Adam, RMSProp, etc
+ __optimizer.zero_grad()__ zero the gradient buffers
+ __optimizer.step()__ does the update to the parameters by the method according to the chosen type of __optimizer__.



## Define the network

In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1,6,5)
        self.conv2 = nn.Conv2d(6,16,5)
        self.fc1 = nn.Linear(16*5*5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84,10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [12]:
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### The learnable parameters of a model are returned by __net.parameters()__

In [13]:
list(net.parameters())

[Parameter containing:
 tensor([[[[-0.1875,  0.0344,  0.0182, -0.1644,  0.1369],
           [ 0.1661,  0.0539,  0.0621, -0.1101, -0.0668],
           [-0.0767, -0.0947, -0.0090, -0.0656,  0.0091],
           [ 0.1047, -0.1831, -0.0161,  0.1389,  0.0815],
           [ 0.0007,  0.1594, -0.0443, -0.0897,  0.0609]]],
 
 
         [[[ 0.0597,  0.0386,  0.1523,  0.1792,  0.1432],
           [-0.0182, -0.0162, -0.1885, -0.0031,  0.0994],
           [-0.1611, -0.1465,  0.0833,  0.1721,  0.1154],
           [ 0.0368, -0.1133, -0.1545,  0.1609, -0.1065],
           [-0.1576, -0.0103, -0.1056,  0.0686, -0.0862]]],
 
 
         [[[-0.1896, -0.1388,  0.0217,  0.0373,  0.1808],
           [ 0.1805,  0.0332,  0.0218,  0.1548, -0.1305],
           [-0.1803, -0.0871, -0.0179, -0.1522,  0.1826],
           [-0.1798,  0.0391, -0.0855,  0.1184, -0.0613],
           [-0.1366, -0.0685, -0.1388,  0.0649,  0.1092]]],
 
 
         [[[-0.1358,  0.0689,  0.0698, -0.0774,  0.0583],
           [-0.0999, -0.0555, -

In [14]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 5, 5])


In [16]:
input = torch.randn(1,1,32,32)
out = net(input)
print(out)

tensor([[-0.1007, -0.1486, -0.1099,  0.1223,  0.0644, -0.1350, -0.0055, -0.0213,
          0.0010,  0.0110]], grad_fn=<AddmmBackward>)


+ __net.zero_grad()__ zero the gradient buffers of all parameters
+ __out.backward()__ backprops


+ __torch.nn__ only supports mini-batches. Example, nn.Conv2d will take in a 4D Tensor of __nSamples * nChannels * Height * Width__
+ If you have a single sample, just use __input.unsqueeze(0)__ to add a fake batch dimension

In [17]:
output = net(input)
target = torch.randn(10)
target = target.view(1,-1)
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.7814, grad_fn=<MseLossBackward>)


In [23]:
loss.grad_fn


<MseLossBackward at 0x7f4658d87a90>

In [25]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x7f4658d87a90>
<AddmmBackward object at 0x7f4658dff5c0>
<AccumulateGrad object at 0x7f4658e03400>


+ __loss.grad_fn__ shows the gradient function associated with tensor and __loss.grad_fn.next_functions__ can be used to see previous functions associated with it 

In [28]:
net.zero_grad()
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0072, -0.0157,  0.0108,  0.0134, -0.0110, -0.0007])


In [31]:
print(list(net.parameters()))

[Parameter containing:
tensor([[[[-0.1875,  0.0344,  0.0182, -0.1644,  0.1369],
          [ 0.1661,  0.0539,  0.0621, -0.1101, -0.0668],
          [-0.0767, -0.0947, -0.0090, -0.0656,  0.0091],
          [ 0.1047, -0.1831, -0.0161,  0.1389,  0.0815],
          [ 0.0007,  0.1594, -0.0443, -0.0897,  0.0609]]],


        [[[ 0.0597,  0.0386,  0.1523,  0.1792,  0.1432],
          [-0.0182, -0.0162, -0.1885, -0.0031,  0.0994],
          [-0.1611, -0.1465,  0.0833,  0.1721,  0.1154],
          [ 0.0368, -0.1133, -0.1545,  0.1609, -0.1065],
          [-0.1576, -0.0103, -0.1056,  0.0686, -0.0862]]],


        [[[-0.1896, -0.1388,  0.0217,  0.0373,  0.1808],
          [ 0.1805,  0.0332,  0.0218,  0.1548, -0.1305],
          [-0.1803, -0.0871, -0.0179, -0.1522,  0.1826],
          [-0.1798,  0.0391, -0.0855,  0.1184, -0.0613],
          [-0.1366, -0.0685, -0.1388,  0.0649,  0.1092]]],


        [[[-0.1358,  0.0689,  0.0698, -0.0774,  0.0583],
          [-0.0999, -0.0555, -0.1094,  0.0269,  0.124

In [37]:
learning_rate = 0.01
for f in net.parameters():
    f.data = f.data -(f.grad.data * learning_rate)

In [38]:
print(list(net.parameters()))

[Parameter containing:
tensor([[[[-0.1879,  0.0343,  0.0183, -0.1636,  0.1369],
          [ 0.1653,  0.0537,  0.0615, -0.1101, -0.0670],
          [-0.0765, -0.0950, -0.0092, -0.0649,  0.0091],
          [ 0.1050, -0.1828, -0.0167,  0.1393,  0.0816],
          [ 0.0014,  0.1596, -0.0440, -0.0891,  0.0607]]],


        [[[ 0.0597,  0.0390,  0.1534,  0.1792,  0.1433],
          [-0.0184, -0.0162, -0.1885, -0.0033,  0.0993],
          [-0.1617, -0.1455,  0.0830,  0.1724,  0.1162],
          [ 0.0370, -0.1125, -0.1547,  0.1611, -0.1063],
          [-0.1587, -0.0102, -0.1049,  0.0690, -0.0857]]],


        [[[-0.1889, -0.1389,  0.0226,  0.0375,  0.1809],
          [ 0.1805,  0.0331,  0.0224,  0.1545, -0.1304],
          [-0.1805, -0.0875, -0.0181, -0.1524,  0.1829],
          [-0.1792,  0.0391, -0.0859,  0.1182, -0.0611],
          [-0.1366, -0.0686, -0.1383,  0.0646,  0.1088]]],


        [[[-0.1353,  0.0689,  0.0699, -0.0771,  0.0583],
          [-0.0994, -0.0549, -0.1098,  0.0267,  0.124

+ __torch.optim__ package implements all the optimization methods as SGD, Nesterov-SGD, Adam, RMSProp, etc
+ __optimizer.zero_grad()__ zero the gradient buffers
+ __optimizer.step()__ does the update to the parameters by the method according to the chosen type of __optimizer__.

In [41]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(),lr=0.01)
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()