In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# Neural Network can be constructed using the torch.nn package
# nn depends on autograd. An 'nn.Module' contains layers and a method 'forward(input)' that returns the output.

# Note that:
# torch.nn only supports mini-batches, the entire torch.nn package only supports inputs that are a mini-batch of samples and not a single sample.

In [7]:
# Define the network

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    # You just have to define the forward function, and the backward function is automatically defined for you using autograd.
    def forward(self, x):
        """
        The computation process:
        input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
              -> view(reshape) -> linear -> relu -> linear -> relu -> linear
              -> MSEloss
              -> loss
        """
        # Max polling over a (2,2) window:
        x = F.max_pool2d(F.relu(self.conv1(x)), (2,2))
        
        x = F.max_pool2d(F.relu(self.conv2(x)), (2))
        # Perform reshape
        x = x.view(-1, self.num_flat_features(x))
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s

        return num_features
    
net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [11]:
# Recap:
# torch.Tensor: A multi-dimensional array with support for auto grad operations like backward(). Also holds the gradient.
# nn.Module: Neural Network module, convenient way of encapsulating parameters, with helpers for moving them to GPU, etc.
# nn.Parameters: A kind of Tensor, that is automatically registered as a parameter when assigned as an attribute to a Module.
# autograd.Function: implements forward and backward definitions of an autograd operation. Every Tensor operation creates at least a single Function node that
# connects to functions that created a Tensor and encodes its history.

In [20]:
# Loss function
# A lossfunction takes (output, target) pair of inputs and computes a value that estimate how far away the output is from the target.

# There are several different loss functions under the nn package. A simple loss is: nn.MSELoss.

# For example:
input = torch.randn(1, 1, 32, 32)
output = net(input)

target = torch.randn(10)
target = target.view(1, -1)

criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

# The computation process:
# input -> conv2d -> relu -> maxpool2d -> conv2d -> relu -> maxpool2d
#       -> view(reshape) -> linear -> relu -> linear -> relu -> linear
#       -> MSEloss
#       -> loss

net.zero_grad()  #zeroes the gradient buffers of all parameters.

loss.backward()
print(net.conv1.bias.grad)

tensor(0.7025, grad_fn=<MseLossBackward>)
tensor([ 0.0151,  0.0026, -0.0116,  0.0293,  0.0045,  0.0200])


In [25]:
# Update the weight:

# You could:
# ------
learning_rate = 0.01
for f in net.parameters():
    # f = f - learning_rate * gradient
    f.data.sub_(f.grad.data * learning_rate)

# Or you could use the torch.optim package:
# ------
import torch.optim as optim
optimizer = optim.SGD(net.parameters(), lr = 0.01)


# In your traning_loop
for i in range(256):
    optimizer.zero_grad()    #zeroes the gradient buffers
    output = net(input)
    loss = criterion(output, target)
    loss.backward()
    optimizer.step()
    print(loss)

tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLossBackward>)
tensor(2.8089e-15, grad_fn=<MseLos