# 3. Convolutions


Convolutional neural networks are my favs. In this tutorial, I will show you how to implement forward and bacward pass in convolutions.


This tutorial is based on [CS231n Winter 2016: Lecture 6-7: Neural Networks, Convolutional Neural Networks](https://cs231n.github.io/convolutional-networks/), [video](https://www.youtube.com/watch?v=i94OvYb6noo&list=PLkt2uSq6rBVctENoVBg1TpCC7OQi31AlC&index=4).

### TL,DR

+ Implement forward and backward computation flows of convolutions with numpy.
+ Sanity check with pytorch.

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
def dim_checker(a,b):
    return a.shape==b.shape

In [3]:
class Conv:
    def __init__(self,in_channels=1, out_channels=1,kernel_size=(2, 2), stride=1, padding=0):
        
        self.kernel_h,self.kernel_w=kernel_size
        self.weight=np.random.randn(out_channels,
                               in_channels,
                               self.kernel_h,
                               self.kernel_w) /np.sqrt(in_channels/2)
        self.bias=np.zeros(out_channels)    

        
        self.stride=stride
        self.padding=padding

        # Gradients.
        self.dweight, self.dbias=None, None
        self.cache=dict()

    def set_params(self,weights,bias=None):
        self.weight,self.bias=weights, bias
        n,d,self.kernel_h,self.kernel_w=self.weight.shape        
    
    def compute_dim(self,X):
        # parameter check
        xN, xD, xH, xW = X.shape
        wN, wD, wH, wW = self.weight.shape
        assert wH == wW
        assert (xH - wH) % self.stride == 0
        assert (xW - wW) % self.stride == 0
        self.cache['X']=X
        
        zH, zW = (xH - wH) // self.stride + 1, (xW - wW) // self.stride + 1
        zD,zN = wN,xN
        return np.zeros((zN, zD, zH, zW))
    
    def get_region(self,hight,width):
        h1=hight*self.stride
        h2=h1+self.kernel_h
        w1=width*self.stride
        w2=w1+self.kernel_w
        return h1,h2,w1,w2
    
    def convolve_forward_step(self,X_n):
        xD, xH, xW = X_n.shape
        hZ=int((xH-self.kernel_h)/self.stride+1)
        wZ=int((xW-self.kernel_w)/self.stride+1)
        Z = np.zeros((len(self.weight),hZ, wZ))
        
        for d in range(len(Z)):
            for i in range(hZ):
                for j in range(wZ):
                    h1,h2,w1,w2=self.get_region(i,j)
                    x_loc = X_n[:, 
                              h1: h2,
                              w1: w2]
                    Z[d,i,j]=np.sum(x_loc*self.weight[d])+ self.bias[d]
        return Z
    
    def forward(self,X):
        Z=self.compute_dim(X)
        for n in range(len(Z)):
            Z[n,:,:,:]=self.convolve_forward_step(X[n])
        self.cache['Z']=Z
        return Z
    
    def backward(self,dZ):        
        assert dim_checker(dZ,self.cache['Z'])
        
        dX, self.dweight, self.dbias=np.zeros(self.cache['X'].shape), np.zeros(self.weight.shape),np.zeros(self.bias.shape)
        (N, depth, hight, width) = dZ.shape
         
        for n in range(N):
            for h in range(hight):        
                for w in range(width):      
                    for d in range(depth): # correcponds to d.th kernel
                        h1,h2,w1,w2=self.get_region(h,w)
                        dX[n,:,h1:h2,w1:w2]+= self.weight[d,:,:,:] * dZ[n, d, h, w]
                        self.dweight[d,:,:,:] += self.cache['X'][n, :, h1:h2, w1:w2] * dZ[n, d, h, w]            
                        self.dbias[d] +=dZ[n, d, h, w]
                    
        return dX

In [4]:
# Generate data and determine the hyperparameters of convolution.
xN, xD, xH, xW =3, 3, 4, 4
X = np.random.randn(xN, xD, xH, xW)
#kernel init
nW, k, stride = 3, 2, 2

### Convolution with forward and backward computaton with Pytorch.

In [5]:
import torch
from torch import nn
from torch.autograd import Variable


conv = nn.Conv1d(in_channels=xD, out_channels=nW,kernel_size=(k, k), stride=stride)

weights=conv.weight.data.detach().numpy()
bias=conv.bias.data.detach().numpy()

x_torch = torch.from_numpy(X).float() 
x_torch = Variable(x_torch, requires_grad=True)
# Compute Conv
res=conv(x_torch)
# Sum the res
out=res.sum()
out.backward() # compute gradients.

In [6]:
# output of conv
res

tensor([[[[ 0.0883,  0.8323],
          [-0.5934,  0.6978]],

         [[-0.2491, -0.4646],
          [ 0.1451,  0.3833]],

         [[-0.1083, -1.1190],
          [ 1.0292,  0.1934]]],


        [[[-0.5988, -0.1015],
          [-0.9550, -0.1143]],

         [[-0.5256,  0.6392],
          [-0.3477,  0.3384]],

         [[ 0.5915,  0.0048],
          [-0.0641,  0.6729]]],


        [[[ 0.6175, -0.7181],
          [ 0.1579,  0.4720]],

         [[-0.5260, -0.8538],
          [-0.7747,  0.2885]],

         [[-1.3564, -0.3551],
          [-1.1209,  0.3674]]]], grad_fn=<MkldnnConvolutionBackward>)

### Convolution with forward and backward computaton with numpy.

In [7]:
# Our framework
our_conv = Conv(in_channels=xD, out_channels=nW, kernel_size=(k, k), stride=stride)
# Use the same weights and bias.
our_conv.set_params(weights=weights,
                    bias=bias)
# Compute Conv
Z=our_conv.forward(X)
# Compute gradients. Note that gradient of addition is 1.
dX=our_conv.backward(np.ones(Z.shape))

In [8]:
Z

array([[[[ 0.0882748 ,  0.83231789],
         [-0.59339344,  0.69775635]],

        [[-0.24908992, -0.46456248],
         [ 0.1451118 ,  0.38327691]],

        [[-0.10827968, -1.11901822],
         [ 1.02921658,  0.19340986]]],


       [[[-0.59878981, -0.10146911],
         [-0.95499542, -0.11428114]],

        [[-0.52564307,  0.63918315],
         [-0.3477486 ,  0.33841396]],

        [[ 0.59146703,  0.00477638],
         [-0.06407195,  0.6729271 ]]],


       [[[ 0.61746663, -0.71805779],
         [ 0.15794283,  0.47199205]],

        [[-0.52599765, -0.85378458],
         [-0.77470156,  0.2885031 ]],

        [[-1.35641148, -0.35514035],
         [-1.12085108,  0.36742743]]]])

# Sanity checking

In [9]:
assert np.all(np.allclose(res.data.detach().numpy(),Z,atol=1e6))
assert np.all(np.allclose(x_torch.grad.data.detach().numpy(),dX))
assert np.all(np.allclose(conv.bias.grad.data.detach().numpy(),our_conv.dbias))
assert np.all(np.allclose(conv.weight.grad.data.detach().numpy(),our_conv.dweight))

# We show how to implement forward and backward computation in convolutions
# However, let's take a look at it again.

In [10]:
from d2l import torch as d2l
import torch
from torch import nn

def corr2d(X, K):  #@save
    """Compute 2D cross-correlation."""
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

In [11]:
X = torch.tensor([[0.0, 1.0, 1.0], 
                  [0.0, 1.0, 0.0], 
                  [0.0, 0.0, 0.0]])
K = torch.tensor([[1.0, 0.0], 
                  [1.0, 0.0]])
corr2d(X, K)

tensor([[0., 2.],
        [0., 1.]])

# What corr2d(X, K) tells us ?

Corr2d draws an image to us by looking at X from the eyes of K.