# Neural Networks

In [1]:
import torch
"""
neural networks can be constructed using torch.nn package

torch.nn depends on autograd package to define models and differentiate them
"""
import torch.nn as nn
import torch.nn.functional as F


In [2]:
"""
A typical traning procedure for a neural network is as follows:

1. Define the neural network that has some learnable parameters(generally weights)

2. Iterate over a dataset of inputs

3. Process input through the network

4. Compute the loss (how far is the output from being correct)

5. Propagate gradients back into the network’s parameters

6. Update the weights of the network, typically using a simple update rule: 

              weiget = weight - learning_rate * gradient

"""

'\nA typical traning procedure for a neural network is as follows:\n\n1. Define the neural network that has some learnable parameters(generally weights)\n\n2. Iterate over a dataset of inputs\n\n3. Process input through the network\n\n4. Compute the loss (how far is the output from being correct)\n\n5. Propagate gradients back into the network’s parameters\n\n6. Update the weights of the network, typically using a simple update rule: \n\n              weiget = weight - learning_rate * gradient\n\n'

In [12]:
class Net(nn.Module):
    """
    This net expects (32, 32) input size. If you want to use this net, resize the input to (32, 32).
    
    """
    
    def __init__(self):
        super(Net, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 6, 5)       # 1 input channels, 6 output channels, 5*5 convolution
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        self.fc1 = nn.Linear(16 * 5 * 5, 120) # full connected layer
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        
    def forward(self, x):
        """
        This function should include forward computation, and the backward function is automatically
        
        defined for you using autograd
        """
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))  # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]       # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [13]:
net = Net()

print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [14]:
params = list(net.parameters())
for param in params:
    print(param.size())

torch.Size([6, 1, 5, 5])
torch.Size([6])
torch.Size([16, 6, 5, 5])
torch.Size([16])
torch.Size([120, 400])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [15]:
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[ 0.0688,  0.0319, -0.0282, -0.0356,  0.0741,  0.0168, -0.0394, -0.0019,
         -0.0659,  0.0210]], grad_fn=<ThAddmmBackward>)


In [16]:
net.zero_grad()  # zero the gradient buffers of parameters
out.backward(torch.randn(1, 10))   # why is (1, 10)? Ans: out of net is (1, 10)

## Loss function 

In [17]:
output = net(input)
target = torch.randn(10)  # a dummy target
target = target.view(1, -1)  # make target the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(2.5134, grad_fn=<MseLossBackward>)


In [18]:
print(loss.grad_fn)
"""
input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
      -> view -> linear -> relu -> linear -> relu -> linear
      -> MSELoss
      -> loss
"""

<MseLossBackward object at 0x7fcc53eddb00>


In [19]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7fcc53eddcc0>
<ThAddmmBackward object at 0x7fcc53edfe10>
<ExpandBackward object at 0x7fcc53eddcc0>


In [20]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0067, -0.0031,  0.0011,  0.0150,  0.0005,  0.0147])


In [21]:
# Update the weights of the network: weiget = weight - learning_rate * gradient
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [22]:
import torch.optim as optim
"""
torch.optim implements many different update rules, such as SDG, Adam etc.

"""

'\ntorch.optim implements many different update rules, such as SDG, Adam etc.\n\n'

In [23]:
output = net(input)
loss = criterion(output, target)
loss.backward()                                    # computing gradient

optimizer = optim.SGD(net.parameters(), lr=0.01)   # create your optimizer

optimizer.zero_grad()   # in your training loop: zero the gradient buffers

optimizer.step()    # Does the update