# 3. Forward and Backward with Convolutions



This tutorial is based on [CS231n Winter 2016: Lecture 6-7: Neural Networks, Convolutional Neural Networks](https://cs231n.github.io/convolutional-networks/), [video](https://www.youtube.com/watch?v=i94OvYb6noo&list=PLkt2uSq6rBVctENoVBg1TpCC7OQi31AlC&index=4).

### TL,DR

+ Implement forward and backward computation flows of convolutions with numpy.
+ Sanity check with pytorch.

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
def dim_checker(a,b):
    return a.shape==b.shape

In [3]:
class Conv:
    def __init__(self,in_channels=1, out_channels=1,kernel_size=(2, 2), stride=1, padding=0):
        
        self.kernel_h,self.kernel_w=kernel_size
        self.weight=np.random.randn(out_channels,
                               in_channels,
                               self.kernel_h,
                               self.kernel_w) /np.sqrt(in_channels/2)
        self.bias=np.zeros(out_channels)    

        
        self.stride=stride
        self.padding=padding

        # Gradients.
        self.dweight, self.dbias=None, None
        self.cache=dict()

    def set_params(self,weights,bias=None):
        self.weight,self.bias=weights, bias
        n,d,self.kernel_h,self.kernel_w=self.weight.shape        
    
    def compute_dim(self,X):
        # parameter check
        xN, xD, xH, xW = X.shape
        wN, wD, wH, wW = self.weight.shape
        assert wH == wW
        assert (xH - wH) % self.stride == 0
        assert (xW - wW) % self.stride == 0
        self.cache['X']=X
        
        zH, zW = (xH - wH) // self.stride + 1, (xW - wW) // self.stride + 1
        zD,zN = wN,xN
        return np.zeros((zN, zD, zH, zW))
    
    def get_region(self,hight,width):
        h1=hight*self.stride
        h2=h1+self.kernel_h
        w1=width*self.stride
        w2=w1+self.kernel_w
        return h1,h2,w1,w2
    
    def convolve_forward_step(self,X_n):
        xD, xH, xW = X_n.shape
        hZ=int((xH-self.kernel_h)/self.stride+1)
        wZ=int((xW-self.kernel_w)/self.stride+1)
        Z = np.zeros((len(self.weight),hZ, wZ))
        
        for d in range(len(Z)):
            for i in range(hZ):
                for j in range(wZ):
                    h1,h2,w1,w2=self.get_region(i,j)
                    x_loc = X_n[:, 
                              h1: h2,
                              w1: w2]
                    Z[d,i,j]=np.sum(x_loc*self.weight[d])+ self.bias[d]
        return Z
    
    def forward(self,X):
        Z=self.compute_dim(X)
        for n in range(len(Z)):
            Z[n,:,:,:]=self.convolve_forward_step(X[n])
        self.cache['Z']=Z
        return Z
    
    def backward(self,dZ):        
        assert dim_checker(dZ,self.cache['Z'])
        
        dX, self.dweight, self.dbias=np.zeros(self.cache['X'].shape), np.zeros(self.weight.shape),np.zeros(self.bias.shape)
        (N, depth, hight, width) = dZ.shape
         
        for n in range(N):
            for h in range(hight):        
                for w in range(width):      
                    for d in range(depth): # correcponds to d.th kernel
                        h1,h2,w1,w2=self.get_region(h,w)
                        dX[n,:,h1:h2,w1:w2]+= self.weight[d,:,:,:] * dZ[n, d, h, w]
                        self.dweight[d,:,:,:] += self.cache['X'][n, :, h1:h2, w1:w2] * dZ[n, d, h, w]            
                        self.dbias[d] +=dZ[n, d, h, w]
                    
        return dX

In [4]:
# Generate data and determine the hyperparameters of convolution.
xN, xD, xH, xW =3, 3, 4, 4
X = np.random.randn(xN, xD, xH, xW)
#kernel init
nW, k, stride = 3, 2, 2

### Convolution with forward and backward computaton with Pytorch.

In [5]:
import torch
from torch import nn
from torch.autograd import Variable


conv = nn.Conv1d(in_channels=xD, out_channels=nW,kernel_size=(k, k), stride=stride)

weights=conv.weight.data.detach().numpy()
bias=conv.bias.data.detach().numpy()

x_torch = torch.from_numpy(X).float() 
x_torch = Variable(x_torch, requires_grad=True)
# Compute Conv
res=conv(x_torch)
# Sum the res
out=res.sum()
out.backward() # compute gradients.

In [6]:
# output of conv
res

tensor([[[[-0.1736, -1.0017],
          [ 0.3615,  0.6587]],

         [[ 0.2726,  0.3989],
          [-0.4202,  0.0123]],

         [[-0.0096, -0.1182],
          [ 0.0696, -0.5973]]],


        [[[ 0.5397,  0.9625],
          [-0.0459,  0.0360]],

         [[-0.4860, -1.0586],
          [ 0.4743, -0.2946]],

         [[ 0.3986,  0.0487],
          [ 0.0093,  0.3697]]],


        [[[ 0.4263, -0.8408],
          [ 0.5263, -0.1460]],

         [[-0.0229,  0.4724],
          [ 0.1137, -0.1686]],

         [[ 0.4142,  0.5402],
          [-0.5596,  0.7685]]]], grad_fn=<MkldnnConvolutionBackward>)

### Convolution with forward and backward computaton with numpy.

In [7]:
# Our framework
our_conv = Conv(in_channels=xD, out_channels=nW, kernel_size=(k, k), stride=stride)
# Use the same weights and bias.
our_conv.set_params(weights=weights,
                    bias=bias)
# Compute Conv
Z=our_conv.forward(X)
# Compute gradients. Note that gradient of addition is 1.
dX=our_conv.backward(np.ones(Z.shape))

In [8]:
Z

array([[[[-0.17356728, -1.00174533],
         [ 0.36150299,  0.65872909]],

        [[ 0.27264468,  0.39885802],
         [-0.42024546,  0.01231507]],

        [[-0.00960317, -0.11824357],
         [ 0.06955349, -0.59728522]]],


       [[[ 0.53967837,  0.96254086],
         [-0.04587774,  0.03596001]],

        [[-0.48599323, -1.05856395],
         [ 0.47428459, -0.29457397]],

        [[ 0.3985896 ,  0.04870414],
         [ 0.00931856,  0.36972917]]],


       [[[ 0.42632537, -0.84082647],
         [ 0.52628804, -0.14603831]],

        [[-0.02294473,  0.47241284],
         [ 0.11368198, -0.16859461]],

        [[ 0.41421841,  0.54022526],
         [-0.55958669,  0.76850111]]]])

# Sanity checking

In [9]:
assert np.all(np.allclose(res.data.detach().numpy(),Z,atol=1e6))
assert np.all(np.allclose(x_torch.grad.data.detach().numpy(),dX))
assert np.all(np.allclose(conv.bias.grad.data.detach().numpy(),our_conv.dbias))
assert np.all(np.allclose(conv.weight.grad.data.detach().numpy(),our_conv.dweight))