# 3. Convolutions


Convolutional neural networks are my favs. In this tutorial, I will show you how to implement forward and bacward pass in convolutions.


This tutorial is based on [CS231n Winter 2016: Lecture 6-7: Neural Networks, Convolutional Neural Networks](https://cs231n.github.io/convolutional-networks/), [video](https://www.youtube.com/watch?v=i94OvYb6noo&list=PLkt2uSq6rBVctENoVBg1TpCC7OQi31AlC&index=4).

### TL,DR

+ Implement forward and backward computation flows of convolutions with numpy.
+ Sanity check with pytorch.

In [1]:
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
def dim_checker(a,b):
    return a.shape==b.shape

In [3]:
class Conv:
    def __init__(self,in_channels=1, out_channels=1,kernel_size=(2, 2), stride=1, padding=0):
        
        self.kernel_h,self.kernel_w=kernel_size
        self.weight=np.random.randn(out_channels,
                               in_channels,
                               self.kernel_h,
                               self.kernel_w) /np.sqrt(in_channels/2)
        self.bias=np.zeros(out_channels)    

        
        self.stride=stride
        self.padding=padding

        # Gradients.
        self.dweight, self.dbias=None, None
        self.cache=dict()

    def set_params(self,weights,bias=None):
        self.weight,self.bias=weights, bias
        n,d,self.kernel_h,self.kernel_w=self.weight.shape        
    
    def compute_dim(self,X):
        # parameter check
        xN, xD, xH, xW = X.shape
        wN, wD, wH, wW = self.weight.shape
        assert wH == wW
        assert (xH - wH) % self.stride == 0
        assert (xW - wW) % self.stride == 0
        self.cache['X']=X
        
        zH, zW = (xH - wH) // self.stride + 1, (xW - wW) // self.stride + 1
        zD,zN = wN,xN
        return np.zeros((zN, zD, zH, zW))
    
    def get_region(self,hight,width):
        h1=hight*self.stride
        h2=h1+self.kernel_h
        w1=width*self.stride
        w2=w1+self.kernel_w
        return h1,h2,w1,w2
    
    def convolve_forward_step(self,X_n):
        xD, xH, xW = X_n.shape
        hZ=int((xH-self.kernel_h)/self.stride+1)
        wZ=int((xW-self.kernel_w)/self.stride+1)
        Z = np.zeros((len(self.weight),hZ, wZ))
        
        for d in range(len(Z)):
            for i in range(hZ):
                for j in range(wZ):
                    h1,h2,w1,w2=self.get_region(i,j)
                    x_loc = X_n[:, 
                              h1: h2,
                              w1: w2]
                    Z[d,i,j]=np.sum(x_loc*self.weight[d])+ self.bias[d]
        return Z
    
    def forward(self,X):
        Z=self.compute_dim(X)
        for n in range(len(Z)):
            Z[n,:,:,:]=self.convolve_forward_step(X[n])
        self.cache['Z']=Z
        return Z
    
    def backward(self,dZ):        
        assert dim_checker(dZ,self.cache['Z'])
        
        dX, self.dweight, self.dbias=np.zeros(self.cache['X'].shape), np.zeros(self.weight.shape),np.zeros(self.bias.shape)
        (N, depth, hight, width) = dZ.shape
         
        for n in range(N):
            for h in range(hight):        
                for w in range(width):      
                    for d in range(depth): # correcponds to d.th kernel
                        h1,h2,w1,w2=self.get_region(h,w)
                        dX[n,:,h1:h2,w1:w2]+= self.weight[d,:,:,:] * dZ[n, d, h, w]
                        self.dweight[d,:,:,:] += self.cache['X'][n, :, h1:h2, w1:w2] * dZ[n, d, h, w]            
                        self.dbias[d] +=dZ[n, d, h, w]
                    
        return dX

In [4]:
# Generate data and determine the hyperparameters of convolution.
xN, xD, xH, xW =3, 3, 4, 4
X = np.random.randn(xN, xD, xH, xW)
#kernel init
nW, k, stride = 3, 2, 2

### Convolution with forward and backward computaton with Pytorch.

In [5]:
import torch
from torch import nn
from torch.autograd import Variable


conv = nn.Conv1d(in_channels=xD, out_channels=nW,kernel_size=(k, k), stride=stride)

weights=conv.weight.data.detach().numpy()
bias=conv.bias.data.detach().numpy()

x_torch = torch.from_numpy(X).float() 
x_torch = Variable(x_torch, requires_grad=True)
# Compute Conv
res=conv(x_torch)
# Sum the res
out=res.sum()
out.backward() # compute gradients.

In [6]:
# output of conv
res

tensor([[[[-0.0425, -0.5480],
          [ 0.0891, -0.0269]],

         [[-0.6784, -0.5389],
          [-0.3247, -0.0879]],

         [[-0.0854,  1.0464],
          [ 0.0661,  0.7227]]],


        [[[ 0.6650, -0.2599],
          [-0.8087, -0.3958]],

         [[ 0.5801, -0.7693],
          [-0.2034, -1.0046]],

         [[-0.1822,  0.0312],
          [-0.2092, -0.6925]]],


        [[[-0.0535, -0.3817],
          [ 0.4270,  0.4379]],

         [[-0.2526,  0.0221],
          [ 0.4212,  1.1195]],

         [[-0.3280,  1.5954],
          [ 0.1795,  0.5587]]]], grad_fn=<MkldnnConvolutionBackward0>)

### Convolution with forward and backward computaton with numpy.

In [7]:
# Our framework
our_conv = Conv(in_channels=xD, out_channels=nW, kernel_size=(k, k), stride=stride)
# Use the same weights and bias.
our_conv.set_params(weights=weights,
                    bias=bias)
# Compute Conv
Z=our_conv.forward(X)
# Compute gradients. Note that gradient of addition is 1.
dX=our_conv.backward(np.ones(Z.shape))

In [8]:
Z

array([[[[-0.04249447, -0.54797479],
         [ 0.08912934, -0.02693765]],

        [[-0.67843511, -0.53892653],
         [-0.32466128, -0.08789815]],

        [[-0.08541131,  1.04639815],
         [ 0.06606271,  0.72273387]]],


       [[[ 0.66503736, -0.25988651],
         [-0.80873865, -0.39577682]],

        [[ 0.58013828, -0.76932111],
         [-0.20342898, -1.00464547]],

        [[-0.1821961 ,  0.03121532],
         [-0.20915804, -0.69251473]]],


       [[[-0.05345719, -0.38167461],
         [ 0.42704866,  0.43793867]],

        [[-0.25256085,  0.02212139],
         [ 0.42118294,  1.11945092]],

        [[-0.32799742,  1.59537328],
         [ 0.1795361 ,  0.55865354]]]])

# Sanity checking

In [9]:
assert np.all(np.allclose(res.data.detach().numpy(),Z,atol=1e6))
assert np.all(np.allclose(x_torch.grad.data.detach().numpy(),dX))
assert np.all(np.allclose(conv.bias.grad.data.detach().numpy(),our_conv.dbias))
assert np.all(np.allclose(conv.weight.grad.data.detach().numpy(),our_conv.dweight))

# We show how to implement forward and backward computation in convolutions
# However, let's take a look at it again.

In [10]:
from d2l import torch as d2l
import torch
from torch import nn

def corr2d(X, K):  #@save
    """Compute 2D cross-correlation."""
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

In [11]:
X = torch.tensor([[0.0, 1.0, 1.0], 
                  [0.0, 1.0, 0.0], 
                  [0.0, 0.0, 0.0]])
K = torch.tensor([[1.0, 0.0], 
                  [1.0, 0.0]])
corr2d(X, K)

tensor([[0., 2.],
        [0., 1.]])

# What corr2d(X, K) tells us ?

Corr2d draws an image to us by looking at X from the eyes of K.

In [12]:
import torch

batchsize=1
channel_in=1
height=4
width=50
num_kernels=32
kernel_size=3
x=torch.randn(batchsize, channel_in, height, width)
print(x.shape)
print(x.numel())

torch.Size([1, 1, 4, 50])
200


In [13]:
# 2D conv in an 2D tensor, a matrix
conv = torch.nn.Conv2d(in_channels=channel_in, out_channels=num_kernels, kernel_size=kernel_size, stride=1,padding=1)
print('Num param in conv',conv.weight.numel())
print('Num of output:',conv(x).numel())

mlp=torch.nn.Linear(channel_in*height*width, conv(x).numel()) 

print('Num param in mlp',mlp.weight.numel())
# Same number of output
assert mlp(torch.flatten(x)).numel() == conv(x).numel()
print('Param ratio:',mlp.weight.numel()/conv.weight.numel())


Num param in conv 288
Num of output: 6400
Num param in mlp 1280000
Param ratio: 4444.444444444444


In [14]:
# Convert the representation
channel_in=4
height=1
x=torch.randn(batchsize, channel_in, height, width)
print(x.shape)
print(x.numel())

torch.Size([1, 4, 1, 50])
200


In [15]:
# 2D conv in an 2D tensor, a matrix
conv = torch.nn.Conv2d(in_channels=channel_in, out_channels=num_kernels, kernel_size=kernel_size, stride=1,padding=1)
print('Num param in conv',conv.weight.numel())
print('Num of output:',conv(x).numel())


mlp=torch.nn.Linear(channel_in*height*width, conv(x).numel()) 
print('Num param in mlp',mlp.weight.numel())

# Same number of output
assert mlp(torch.flatten(x)).numel() == conv(x).numel()
print('Param ratio:',mlp.weight.numel()/conv.weight.numel())


Num param in conv 1152
Num of output: 1600
Num param in mlp 320000
Param ratio: 277.77777777777777
