# Convolutions 

In [None]:
import torch 
from torch import nn

In [3]:
# Cross Correlation/convolution:

def corr2d(X, K): 
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = torch.sum((X[i: i + h, j: j + w] * K))
    return Y

In [6]:
# Example:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

# Convolution_Layer 

In [25]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
        
    def forward(self,x):
        return corr2d(x,self.weight) + self.bias
        
# number of parameters = number_of_kernels *(size of kernels + 1)

In [26]:
# Edge detection:

X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [27]:
K = torch.tensor([[1.0, -1.0]])
Y = corr2d(X, K)
Y

# we can see, we detect 1 for the edge from white to black and -1 for the edge from black to white.
# All other outputs take value 0.

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [28]:
Conv2D

__main__.Conv2D

In [36]:
conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)

X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))

for i in range(15):
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
    conv2d.zero_grad()
    l.sum().backward()
    # Update the kernel
    conv2d.weight.data[:] -= 3e-2 * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'batch {i + 1}, loss {l.sum():.3f}')

batch 2, loss 10.863
batch 4, loss 2.809
batch 6, loss 0.875
batch 8, loss 0.312
batch 10, loss 0.120
batch 12, loss 0.048
batch 14, loss 0.019


In [39]:
conv2d.weight

Parameter containing:
tensor([[[[ 1.0100, -0.9871]]]], requires_grad=True)

# Padding

In [48]:
def comp_conv2d(conv2d, X):
    X = X.reshape((1, 1) + X.shape)
#     print(X.shape)
    Y = conv2d(X)
    return Y.reshape(Y.shape[2:])

# Padding=1 implies we are adding a row and column on either sides of our image
# inshort we are adding 1 rows in either side and 1 columns in either sides.

# => Image becomes 10*10 convolues with 3*3 ==> 8*8
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
X = torch.rand(size=(8, 8))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [49]:
X.shape

torch.Size([8, 8])

In [52]:
# here padding (2,1) implies we are adding 2 rows on either side and 1 column o either side of our image

# => Image becomes as 12*10 convolues with 5*3 ==> (12-5+1)*(10-3+1) ==> 8*8 
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, X).shape

torch.Size([8, 8])

# Striding

In [55]:
# Stride is 2 means we are shifing twice fast
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, X).shape

torch.Size([4, 4])

In [58]:
# Here we have image 8*8 with padding (0,1) => 8*10
# 8*10 convlues with (3,5) with stride (3,4) ==>
# Positions of left top (0,0),(0,4),(3,0),(3,4) that's it...==> 2*2 
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, X).shape

torch.Size([2, 2])

# Summary

### Thus Padding can increase the height and width of the output. This is often used to give the output the same height and width as the input.

### The stride can reduce the resolution of the output, for example reducing the height and width of the output to only  1/n  of the height and width of the input ( n  is an integer greater than  1 ).

### Padding and stride can be used to adjust the dimensionality of the data effectively.