# ConvNet

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 3) # input 1 channel, output 6 channel, 3x3 square convolution kernel
        self.conv2 = nn.Conv2d(6, 16, 3) # input 6 channel, output 16 channel
        self.fc1 = nn.Linear(16 * 6 * 6, 120) # 6 * 6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10) # 10 classification
        
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # Max pooling over (2,2)
        x = F.max_pool2d(F.relu(self.conv2(x)), 2) # square can be defined by single number
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self, x):
        size = x.size()[1: ]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
    
net = Net()
print(net)
        
        

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### parameter check

In [8]:
params = list(net.parameters())
print(len(params))
print(params[0].size())

10
torch.Size([6, 1, 3, 3])


### sanity check

In [26]:
input = torch.randn(1, 1, 32, 32) # torch.nn support only batch input, doesn't support single sample
out = net(input)
print(out)

tensor([[0.0106, 0.0533, 0.0575, 0.0049, 0.0753, 0.0546, 0.0921, 0.0083, 0.0439,
         0.0157]], grad_fn=<AddmmBackward>)


In [27]:
net.zero_grad()
out.backward(torch.randn(1, 10))

In [40]:
input = torch.randn(1, 1, 32, 32)
input # check dimension of original input sample 4D.

tensor([[[[ 0.1670,  2.8451, -1.4791,  ..., -1.4182,  1.2542,  0.8073],
          [ 0.1219, -1.0511,  0.2649,  ...,  0.2954, -0.3906, -0.3190],
          [-0.9525, -0.4870, -0.5714,  ...,  0.6372,  0.3499, -0.3995],
          ...,
          [-0.5566, -1.3688, -0.2913,  ..., -1.0485, -1.1716, -0.0630],
          [ 1.4634, -1.1598, -0.1570,  ..., -0.2002, -0.1943, -0.2413],
          [-0.2130,  0.8861,  0.3053,  ...,  0.8555,  0.2646, -0.9077]]]])

In [46]:
input = torch.randn(1, 32, 32) 
input.unsqueeze(0) # single sample unsquazed to make 4D dimension 

tensor([[[-0.4242,  1.6720,  3.4235,  ..., -0.4917, -2.1464,  1.2106],
         [-1.9537, -0.9063,  0.8436,  ..., -2.4580, -0.0510,  0.1523],
         [ 1.9506, -1.7663,  0.3023,  ...,  1.9871, -0.6704, -1.1194],
         ...,
         [-0.1865,  0.6222, -0.3429,  ..., -0.5872, -0.7160,  1.1432],
         [-0.4855, -0.3060, -0.3087,  ..., -0.9562,  1.0489,  1.9476],
         [-1.1588, -0.3361,  0.4851,  ...,  0.3367, -0.8414, -1.8588]]])

In [49]:
out = net(input.unsqueeze(0))
print(out) # it works!

tensor([[ 0.0484,  0.0525,  0.0437, -0.0370,  0.0761,  0.0541,  0.0853,  0.0010,
          0.0208, -0.0052]], grad_fn=<AddmmBackward>)


## Loss calculation

In [52]:
input = torch.randn(1, 1, 32, 32)

In [53]:
output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)

tensor(1.1591, grad_fn=<MseLossBackward>)


In [55]:
target = torch.randn(10)
target # the shape of target is differ from that of output.

tensor([ 1.0989,  0.1511, -0.4886,  0.3213, -0.6579,  1.7395,  0.2994,  0.9327,
         0.2585, -0.8508])

In [63]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward object at 0x7feeec4829e8>
<AddmmBackward object at 0x7feeec482a20>
<AccumulateGrad object at 0x7feeec4829e8>


## Backprop

In [65]:
net.zero_grad()

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])


In [66]:
loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad after backward
tensor([ 0.0216, -0.0068,  0.0017,  0.0102,  0.0019,  0.0084])


## update the weights

In [75]:
import torch.optim as optim

optimizer = optim.SGD(net.parameters(), lr = 0.01)

In [76]:
optimizer.zero_grad()
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()