In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [8]:
list(net.parameters())[0]

Parameter containing:
tensor([[[[-8.7834e-02, -2.4357e-01, -8.4748e-02],
          [-2.0933e-04, -1.9281e-01,  2.9978e-01],
          [ 5.1785e-02,  1.5465e-01,  1.7215e-02]]],


        [[[-1.2440e-01, -1.1558e-01, -9.3205e-02],
          [ 1.8206e-01,  8.6198e-02,  3.2542e-01],
          [-1.1322e-01, -2.2042e-01, -2.8049e-01]]],


        [[[ 1.4444e-01, -3.0604e-01,  2.1094e-01],
          [-6.4041e-02,  5.7926e-02, -2.9989e-01],
          [ 7.7765e-02, -1.0225e-03,  8.0155e-02]]],


        [[[-1.2987e-01,  1.5402e-01,  6.7604e-02],
          [ 8.5536e-02,  3.1709e-01, -2.4810e-01],
          [ 8.5445e-02, -2.9215e-01, -2.8064e-01]]],


        [[[-2.3057e-01,  2.7310e-01, -2.0392e-01],
          [ 2.1945e-01, -2.8142e-01,  2.6458e-01],
          [ 5.6181e-02, -2.7568e-01,  2.6646e-01]]],


        [[[-1.8025e-01,  9.9606e-03, -2.4173e-01],
          [-7.9618e-02, -1.5417e-01,  6.3346e-02],
          [ 3.0581e-02, -5.3268e-02,  2.7574e-01]]]], requires_grad=True)

In [9]:
inp = torch.randn(1,1,32,32)
out = net(inp)
out

tensor([[-0.0405,  0.1092, -0.0095,  0.0579, -0.0506, -0.1690, -0.1357, -0.0206,
         -0.1228, -0.0069]], grad_fn=<AddmmBackward>)

In [18]:
print(list(net.parameters())[0].grad)

None


In [19]:
net.zero_grad()
print(list(net.parameters())[0].grad)


None


In [23]:
out.backward(torch.randn(1, 10))
print(list(net.parameters())[0].grad)


tensor([[[[-0.1126, -0.1494,  0.0310],
          [-0.0322, -0.1467,  0.0929],
          [ 0.0210,  0.0844,  0.0066]]],


        [[[ 0.0129,  0.0668, -0.0654],
          [-0.0799,  0.0328, -0.1382],
          [-0.0206,  0.0084,  0.1162]]],


        [[[ 0.0312,  0.0235,  0.0566],
          [ 0.0345, -0.1269, -0.0261],
          [ 0.0453,  0.0543,  0.0100]]],


        [[[ 0.0739, -0.1061, -0.0844],
          [ 0.0124, -0.1925,  0.1466],
          [ 0.0403,  0.0466,  0.1065]]],


        [[[-0.0239,  0.0114,  0.0576],
          [ 0.0430,  0.0811, -0.1137],
          [ 0.0393,  0.0197,  0.0424]]],


        [[[ 0.0236,  0.0008, -0.1074],
          [-0.0530,  0.0173,  0.0455],
          [-0.0294,  0.0035,  0.0688]]]])


In [44]:
output = net(inp)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output

criterion = nn.MSELoss()
loss = criterion(output, target)
print(loss)

tensor(0.6730, grad_fn=<MseLossBackward>)


In [46]:
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([ 0.0006, -0.0201, -0.0059,  0.0094, -0.0216, -0.0148])


In [181]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
print(net.conv1.bias.grad)
output = net(inp)
loss = criterion(output, target)
loss.backward()

optimizer.step()    # Does the update


print(net.conv1.bias.grad)

tensor([0., 0., 0., 0., 0., 0.])
tensor([-4.2362e-04,  1.9571e-05,  0.0000e+00,  9.8025e-04,  0.0000e+00,
         0.0000e+00])
