# Convolutions 

In [27]:
import torch 
from torch import nn

In [28]:
# Cross Correlation/convolution:

def corr2d(X, K): 
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = torch.sum((X[i: i + h, j: j + w] * K))
    return Y

In [29]:
# Example:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

# Convolution_Layer 

In [30]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
        
    def forward(self,x):
        return corr2d(x,self.weight) + self.bias
        
# number of parameters = number_of_kernels *(size of kernels + 1)

In [31]:
# Edge detection:

X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [32]:
K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
Y

# we can see, we detect 1 for the edge from white to black and -1 for the edge from black to white.
# All other outputs take value 0.

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [33]:
conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

for i in range(15):
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
    conv2d.zero_grad()
    l.sum().backward()
    # Update the kernel
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'batch {i + 1}, loss {l.sum():.3f}')

batch 2, loss 10.970
batch 4, loss 2.779
batch 6, loss 0.851
batch 8, loss 0.300
batch 10, loss 0.115
batch 12, loss 0.046
batch 14, loss 0.018


In [34]:
conv2d.weight

Parameter containing:
tensor([[[[ 0.9873, -1.0097]]]], requires_grad=True)

# Padding

In [35]:
def comp_conv2d(conv2d, X):
    X = X.reshape((1, 1) + X.shape)
#     print(X.shape)
    Y = conv2d(X)
    return Y.reshape(Y.shape[2:])

# Padding=1 implies we are adding a row and column on either sides of our image
# inshort we are adding 1 rows in either side and 1 columns in either sides.

# => Image becomes 10*10 convolues with 3*3 ==> 8*8
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
X = torch.rand(size=(8, 8))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [36]:
X.shape

torch.Size([8, 8])

In [37]:
# here padding (2,1) implies we are adding 2 rows on either side and 1 column o either side of our image

# => Image becomes as 12*10 convolues with 5*3 ==> (12-5+1)*(10-3+1) ==> 8*8 
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

# Striding

In [38]:
# Stride is 2 means we are shifing twice fast
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [39]:
# Here we have image 8*8 with padding (0,1) => 8*10
# 8*10 convlues with (3,5) with stride (3,4) ==>
# Positions of left top (0,0),(0,4),(3,0),(3,4) that's it...==> 2*2 
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])

# Summary

Thus Padding can increase the height and width of the output. This is often used to give the output the same height and width as the input.

The stride can reduce the resolution of the output, for example reducing the height and width of the output to only  1/n  of the height and width of the input ( n  is an integer greater than  1 ).

Padding and stride can be used to adjust the dimensionality of the data effectively.

# Multiple input channels

In [40]:
def corr2d_multi_in(X, K):
    return sum(corr2d(x, k) for x, k in zip(X, K))

In [41]:
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

This Here c_i input channels with c_i kernels gives 1 output(use of Compositionality)

Thus if we get c_i input channels we need c_i kernels such that we do convolution with  ith image to ith kernel and then add  them all to get output.

# Multiple input and output channels
let c_i, c_o be number of input channels and output channels and k_w,k_h are width and height of kernel.

so our kernel will be of size c_o * (c_i * k_w * k_h)

Thus for each output i we take ith kernel set and then dng convolution with that set of kernels like said above c_i inputs and 1 output.Thats it

In [42]:
# Number of parameters = c_o * ((c_i * k_w * k_h) + 1).

## Convolution with $1\times1$ Kernels 

Suppose we have output of a convolution layer as $F \times H \times W$
that is F is number of convolution kernels and h,w are spatial dimesions

Now if we do convolution with F1 $1\times1$ kernels which yields $F1\times H\times W$ shaped output thats is we will reduce the  dimension if F1 < F else we increased the dimension.


The  1×1  convolutional layer is equivalent to the fully-connected layer, when applied on a per pixel basis.
                                                                                                                               So basically we will be using $1\times1$ kernels for Dimension reduction purposes.

# Pooling

Pooling layers, which serve the dual purposes of mitigating the sensitivity of convolutional layers to location and of spatially downsampling representations.

Typically they are <br>
- Average Pooling
- Max Pooling <br>

This also has padding and striding parameters <br>

In [46]:
import torch
from torch import nn

def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y

In [47]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

In [48]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

### Padding and striding

In [49]:
X = torch.reshape(torch.arange(16, dtype=torch.float32), (1, 1, 4, 4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [50]:
# by default striding is same as pooling block size(3*3) 
pool2d = nn.MaxPool2d(3)
pool2d(X)

tensor([[[[10.]]]])

In [52]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

### Multiple Channels

In [53]:
X = torch.cat((X, X + 1), 1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [55]:
# similar to conv layers but no parameters
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])

Maximum pooling, combined with a stride larger than 1 can be used to reduce the spatial dimensions (e.g., width and height)<br>
The pooling layer’s number of output channels is the same as the number of input channels. <br>