## Demonstration of gradient calculation for Weights in Convolution by simple 2D example

In [86]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [87]:
x = torch.randn(1,1,3,3, requires_grad=True); x

tensor([[[[ 1.8676, -0.2631, -0.2380],
          [ 1.0669,  0.5283, -1.2112],
          [ 1.1241, -0.1771,  1.3998]]]], requires_grad=True)

In [88]:
conv = nn.Conv2d(1,1,(2,2), bias=False)

In [89]:
conv.weight

Parameter containing:
tensor([[[[ 0.0848, -0.0717],
          [-0.2816,  0.3315]]]], requires_grad=True)

In [90]:
y = conv(x); y

tensor([[[[ 0.0520, -0.5556],
          [-0.3227,  0.6456]]]], grad_fn=<MkldnnConvolutionBackward>)

In [91]:
y = y.sum(); y

tensor(-0.1807, grad_fn=<SumBackward0>)

In [92]:
y.backward()

### Gradient for w1, w2, w3, w4

In [93]:
conv.weight.grad

tensor([[[[ 3.1998, -1.1840],
          [ 2.5423,  0.5398]]]])

In [94]:
x[0][0][:2,:2].sum()

tensor(3.1998, grad_fn=<SumBackward0>)

In [95]:
x[0][0][:2,1:3].sum()

tensor(-1.1840, grad_fn=<SumBackward0>)

In [96]:
x[0][0][1:3,:2].sum()

tensor(2.5423, grad_fn=<SumBackward0>)

In [97]:
x[0][0][1:3,1:3].sum()

tensor(0.5398, grad_fn=<SumBackward0>)

### Gradient for x

In [98]:
w = conv.weight[0][0]; w

tensor([[ 0.0848, -0.0717],
        [-0.2816,  0.3315]], grad_fn=<SelectBackward>)

In [99]:
x.grad

tensor([[[[ 0.0848,  0.0131, -0.0717],
          [-0.1968,  0.0630,  0.2598],
          [-0.2816,  0.0499,  0.3315]]]])

#### x1

In [100]:
w[0][0]

tensor(0.0848, grad_fn=<SelectBackward>)

#### x2

In [101]:
w[0][0] + w[0][1]

tensor(0.0131, grad_fn=<AddBackward0>)

#### etc...

### For bias=True

Gradient for w and x are exactly the same. For b it is simply the number of elements in the output after the convolution.

In [102]:
conv = nn.Conv2d(1,1,(2,2))

In [103]:
x

tensor([[[[ 1.8676, -0.2631, -0.2380],
          [ 1.0669,  0.5283, -1.2112],
          [ 1.1241, -0.1771,  1.3998]]]], requires_grad=True)

In [104]:
conv.weight, conv.bias

(Parameter containing:
 tensor([[[[-0.2303,  0.2391],
           [-0.1444, -0.0239]]]], requires_grad=True), Parameter containing:
 tensor([-0.0465], requires_grad=True))

In [105]:
y = conv(x); y

tensor([[[[-0.7061, -0.0901],
          [-0.3239, -0.4656]]]], grad_fn=<MkldnnConvolutionBackward>)

First element of y. Convolution + bias.

In [113]:
(x[0][0][:2,:2] * conv.weight[0][0][:2,:2]).sum() + conv.bias

tensor([-0.7061], grad_fn=<AddBackward0>)

In [106]:
y = y.sum()
y.backward()

In [107]:
conv.bias.grad

tensor([4.])

In [114]:
conv2 = nn.Conv2d(3,4,(2,2))