In [325]:
import torch
from torch.nn.functional import fold , unfold
import matplotlib.pyplot as plt
im_1, im_2 = 5, 5
k_1, k_2 = 3, 3
bs = 2
ch_in, ch_out = 4, 3
X = torch.empty(bs, ch_in, im_1, im_2).normal_().requires_grad_()
print('X: ', X.size())
F = torch.empty(ch_out, ch_in, k_1, k_2).normal_()
O = torch.nn.functional.conv2d(X, F)
X_unf = unfold(X, kernel_size=(k_1, k_2), padding = 0)
F_expand = F.view(ch_out, -1)
O_expand = F_expand @ X_unf


out_comp = O_expand.view(bs, ch_out, im_1 - k_1 + 1, im_2 - k_2 + 1)
L = O*2

dL_dO = L/O
F_back = F.flip(-1, -2).transpose(0,1)
dL_dX = torch.nn.functional.conv2d(dL_dO, F_back, padding =( k_1 - 1,k_2-1) )#/(bs * ch_out*(im_1 - k_1 + 1) * ( im_2 - k_2 +1))

X_unf = unfold(dL_dO, kernel_size=(k_1, k_2), padding = (k_1 - 1,k_2-1))
F_expand = F_back.reshape(ch_in, -1)

O_expand = F_expand @ X_unf
L.sum().backward()


torch.testing.assert_allclose(O, out_comp)
torch.testing.assert_allclose(dL_dX, X.grad)

print('Passed the output and gradient test')

X:  torch.Size([2, 4, 5, 5])
Passed the output and gradient test


In [339]:
class convolution(object):
    def __init__(self, in_ch, out_ch, kernel_size = (3,3), padding = 0, use_bias = False):
        self.in_ch = in_ch
        self.out_ch = out_ch
        self.kernel_size = kernel_size
        self.k_1 = self.kernel_size[0]
        self.k_2 = self.kernel_size[1]
        
        self.padding = padding
        self.kernel = torch.empty(out_ch, in_ch, self.k_1, self.k_2).normal_()
        self.bias = torch.empty(out_ch).normal_() if use_bias else torch.zeros(out_ch)
        
    def forward(self, x):   
        self.x = x
        self.batch_size = x.size(0)
        X_unf = unfold(x, kernel_size=(self.k_1, self.k_2), padding = self.padding)
        K_expand = self.kernel.view(self.out_ch, -1)
        O_expand = K_expand @ X_unf
        
        s1 = x.size(-2)-self.k_1+1+self.padding*2
        s2 = x.size(-1)-self.k_2+1+self.padding*2
        return O_expand.view(self.batch_size, self.out_ch, s1, s2)
    
    def backward(self, gradwrtoutput):
        kernel_back = self.kernel.flip(-2, -1).transpose(0,1)
        s1 = self.x.size(-2)
        s2 = self.x.size(-1)
        
        dL_dO_unf = unfold(gradwrtoutput, kernel_size=(k_1, k_2), padding = (k_1 - 1, k_2-1))
        dO_dX_exp = kernel_back.reshape(self.in_ch, -1)
        dL_dX_exp = dO_dX_exp @ dL_dO_unf
        dL_dX = dL_dX_exp.view(self.batch_size, self.in_ch, s1, s2)
        
        self.dL_dO = gradwrtoutput.transpose(0,1) # K
        self.dO_dF = self.x.view(self.in_ch, self.batch_size, s1, s2).transpose(0,1) # X
        
        dL_dO_unf_F = self.dL_dO.reshape(self.out_ch, -1)
        dO_dF_exp = unfold(self.dO_dF, kernel_size = (s1 - self.k_1 +1 + self.padding, s2 - self.k_2 +1 + self.padding), padding = self.padding)
        dL_dF_exp = dL_dO_unf_F @ dO_dF_exp
        dL_dF = dL_dF_exp.transpose(0,1).view(self.kernel.size())

        return dL_dX, dL_dF
    
    def param(self) :
        return [self.kernel]

        

In [338]:
import torch
from torch.nn.functional import fold , unfold
im_1, im_2 = 4, 4
k_1, k_2 = 3,3
bs = 2
ch_in, ch_out = 2, 4

X = torch.empty(bs, ch_in, im_1, im_2).normal_().requires_grad_()
X_copy = X.clone().detach().requires_grad_()

conv = convolution(ch_in, ch_out, kernel_size = (k_1, k_2), padding = 0)
F = conv.kernel
F.requires_grad_(True)

out = conv.forward(X)
out_compare = torch.nn.functional.conv2d(X_copy, F)

dL_dX, dL_dF = conv.backward(out/out)
out_compare.backward(out_compare/out_compare)



print('same output of conv: ', (out_compare - out).abs().sum()) 
print('same input gradient: ', (X_copy.grad - dL_dX).abs().sum())
print('same kernel gradient: ',(F.grad-dL_dF).abs().sum() )

AttributeError: 'Tensor' object has no attribute 'zeros_'

In [328]:
print(F.grad)
print(dL_dF)
print(dL_dF.transpose(0,1))
print(dL_dF.view(dL_dF.size(0), dL_dF.size(1), dL_dF.size(2), dL_dF.size(3)))

tensor([[[[ 1.2482,  1.4429,  0.2975],
          [ 2.4374,  1.3758, -1.8613],
          [-2.0750, -1.2036, -0.4597]],

         [[ 1.8387,  3.1812, -0.0757],
          [ 4.0833,  3.4628,  0.6435],
          [-1.7010, -2.5746,  1.1417]]],


        [[[ 1.2482,  1.4429,  0.2975],
          [ 2.4374,  1.3758, -1.8613],
          [-2.0750, -1.2036, -0.4597]],

         [[ 1.8387,  3.1812, -0.0757],
          [ 4.0833,  3.4628,  0.6435],
          [-1.7010, -2.5746,  1.1417]]],


        [[[ 1.2482,  1.4429,  0.2975],
          [ 2.4374,  1.3758, -1.8613],
          [-2.0750, -1.2036, -0.4597]],

         [[ 1.8387,  3.1812, -0.0757],
          [ 4.0833,  3.4628,  0.6435],
          [-1.7010, -2.5746,  1.1417]]],


        [[[ 1.2482,  1.4429,  0.2975],
          [ 2.4374,  1.3758, -1.8613],
          [-2.0750, -1.2036, -0.4597]],

         [[ 1.8387,  3.1812, -0.0757],
          [ 4.0833,  3.4628,  0.6435],
          [-1.7010, -2.5746,  1.1417]]]])
tensor([[[[ 1.2482,  1.4429,  0.2975],
  

In [318]:
# print(F.grad.size())

print(conv.dL_dO.transpose(0,1).size())
print(conv.dO_dF.transpose(0,1).size())

X = conv.dO_dF
K = conv.dL_dO
conv_out = torch.nn.functional.conv2d(X, K).transpose(0,1)
print('kernel: ',K.size())
print('input: ',X.size())
X_unf = unfold(X, kernel_size = (2, 2), padding = 0)
print(X_unf.size())
F_exp = K.reshape(ch_out, -1)
print(F_exp.size())
print(im_1 - conv.k_1 +1 + conv.padding, im_2 - conv.k_2 + 1 + conv.padding)
O_exp = F_exp @ X_unf
O = O_exp.view(ch_in, ch_out, k_1, k_2).transpose(0,1)
print(O.size())
print(conv_out.size())
print(conv.kernel.size())

torch.Size([2, 4, 2, 2])
torch.Size([2, 2, 4, 4])
kernel:  torch.Size([4, 2, 2, 2])
input:  torch.Size([2, 2, 4, 4])
torch.Size([2, 8, 9])
torch.Size([4, 8])
2 2
torch.Size([4, 2, 3, 3])
torch.Size([4, 2, 3, 3])
torch.Size([4, 2, 3, 3])


In [336]:
c = torch.nn.Conv2d(5, 10, kernel_size=(3, 3))
print(c.bias.size())
print(c.weight.size())

torch.Size([10])
torch.Size([10, 5, 3, 3])


In [268]:
conv.kernel

tensor([[[[-0.1950,  0.6027, -0.3157],
          [-0.6203, -1.2110, -0.1113],
          [ 0.4412,  0.4374,  1.0628]],

         [[ 0.0552,  0.3472,  0.0546],
          [-0.0625,  0.0334,  0.7950],
          [-1.0151,  0.0408, -1.5472]]],


        [[[ 0.2375, -0.4273, -0.0871],
          [ 0.4409,  0.0725,  0.7220],
          [-2.4041,  1.3587,  1.2033]],

         [[-1.0719, -1.2230, -2.1777],
          [ 0.0509,  0.2196,  0.1059],
          [ 0.4083,  1.4222,  1.0656]]],


        [[[ 2.1861,  0.9263, -1.1957],
          [ 0.0345,  0.1545,  1.0926],
          [ 0.9858,  1.3075, -0.2445]],

         [[-1.6937,  0.2222, -0.9520],
          [ 1.5626, -1.4964,  2.3466],
          [ 0.1503,  0.2342, -1.6386]]],


        [[[ 0.8826, -0.4346, -0.2652],
          [-0.0776,  1.2662, -0.3891],
          [ 0.2385, -0.4247,  0.5400]],

         [[ 2.3174, -0.0946,  1.1275],
          [ 1.1084, -0.5001, -0.2187],
          [ 1.2660,  0.7753, -1.9662]]]], requires_grad=True)

In [340]:
5 + None

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'