<a href="https://colab.research.google.com/github/Dipak22/Case-Studies/blob/master/convolution.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Import necessary classes and functions

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder
from tqdm.notebook import tqdm
import numpy as np

import warnings
warnings.filterwarnings("ignore")

### create a dummy image and check out the changes after applying convolutions

In [5]:
rand = torch.rand(3,128,128)

# Baseline convolution
conv1 = nn.Conv2d(in_channels=3,
                  out_channels=3,
                  kernel_size=3,
                  stride = 1,
                  padding=0)

#COnvolution with larger kernel
conv2 = nn.Conv2d(in_channels=3,
                  out_channels=3,
                  kernel_size=7,
                  stride=1,
                  padding=0
                  )

#convolution with larger stride
conv3 = nn.Conv2d(in_channels=3,
                  out_channels=3,
                  kernel_size=3,
                  stride=3,
                  padding=0)

#convolution with larger padding
conv4 = nn.Conv2d(in_channels=3,
                  out_channels=3,
                  kernel_size=3,
                  stride=1,
                  padding=2)

#convoolution with larger output channels
conv5 = nn.Conv2d(in_channels=3,
                  out_channels=64,
                  kernel_size=3,
                  stride=1,
                  padding=0)

conv1_out = conv1(rand)
conv2_out = conv2(rand)
conv3_out = conv3(rand)
conv4_out = conv4(rand)
conv5_out = conv5(rand)

print(f"Baseline output: {conv1_out.shape}")
print(f"Larger kernel output: {conv2_out.shape}")
print(f"Larger stride output: {conv3_out.shape}")
print(f"Larger padding output: {conv4_out.shape}")
print(f"More output channels output: {conv5_out.shape}")

Baseline output: torch.Size([3, 126, 126])
Larger kernel output: torch.Size([3, 122, 122])
Larger stride output: torch.Size([3, 42, 42])
Larger padding output: torch.Size([3, 130, 130])
More output channels output: torch.Size([64, 126, 126])


In [15]:
x = torch.rand(32,3,128,128)
patches = nn.functional.unfold(x, kernel_size=3,stride=1, padding=0)
_, coef, num_patches = patches.shape
print(patches.shape)
patches = patches.transpose(1,2).reshape(-1,coef )
print(patches.shape)
ln = nn.Linear(3 * 3 * 3, 64, bias = True)
conv_output = ln(patches)
print(conv_output.shape)
#reshape to batch, out, num_patches
conv_output = conv_output.view(32,64, -1)
print(conv_output.shape)
out_height = (128 + 2 *0 - 3)//1 +1
out_width = (128 + 2 *0 - 3)//1 +1
output  = conv_output.view(32,64,out_height, out_width)
print(output.shape)

torch.Size([32, 27, 15876])
torch.Size([508032, 27])
torch.Size([508032, 64])
torch.Size([32, 64, 15876])
torch.Size([32, 64, 126, 126])


### Implementing convolution using `nn.Unfold`

In [16]:
class MyConv2d(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride =1, padding = 0):
    super(MyConv2d, self).__init__()

    self.kernel_size = kernel_size
    self.stride = stride
    self.padding = padding
    self.in_channels = in_channels
    self.out_channels = out_channels

    #Define the weights of the convolution as a linear layer
    self.linear = nn.Linear(in_channels * kernel_size * kernel_size, out_channels,bias = True)

  def forward(self, x):
    batch_size, channels, height, width = x.shape

    #Ensure input channels match
    assert self.in_channels == channels, " Input channels mismatch"

    #Unfold the input into patches
    #shape; batchsize, in_channels * kernel_size * kernel_size, num_patches
    patches = nn.functional.unfold(x,
                                   kernel_size=self.kernel_size,
                                   stride = self.stride,
                                   padding=self.padding)
    # stroe kernel coefficients and num_patches
    _, num_kernel_coefficients, num_patches = patches.shape
    #transpose the shape to batch_size * num_patches, in_channels * kernel_size * kernel_size
    patches = patches.transpose(1,2).reshape(-1, num_kernel_coefficients)

    #Apply linear layer to perform convolution
    conv_output = self.linear(patches) #shape batch_size * num_patches , out_channels

    # reshape to batch_size, out_channels, num_patches
    conv_output = conv_output.view(batch_size,self.out_channels, -1)
    output_height = (height + 2*self.padding - self.kernel_size )//self.stride + 1
    output_width = (width + 2*self.padding - self.kernel_size )//self.stride + 1

    output = conv_output.view(batch_size, self.out_channels, output_height, output_width)

    return output


In [19]:
myconv = MyConv2d(in_channels=3,
                  out_channels=64,
                  kernel_size=7,
                  stride=1,
                  padding=0)
torchconv = nn.Conv2d(in_channels=3,
                      out_channels=64,
                      kernel_size=7,
                      padding=0,
                      stride=1)
rand = torch.randn(4,3,128,128)

myconv_out = myconv(rand)
torchconv_out = torchconv(rand)

print("Output of my convolution: ", myconv_out.shape)
print("Output of pytorch convolution: ", torchconv_out.shape )

Output of my convolution:  torch.Size([4, 64, 122, 122])
Output of pytorch convolution:  torch.Size([4, 64, 122, 122])


### Average pooling

In [21]:
#create a rand image
rand = torch.rand(3,128,128)
avgpool = nn.AvgPool2d(kernel_size=3)
avgpool_2 = nn.AvgPool2d(kernel_size=3, stride = 2)

out_1 = avgpool(rand)
out_2 = avgpool_2(rand)
print("Basic avg pool, kernel_size == stride", out_1.shape)
print(" smaller stride =2", out_2.shape)

Basic avg pool, kernel_size == stride torch.Size([3, 42, 42])
 smaller stride =2 torch.Size([3, 63, 63])


### Adaptive pooling
we set the output size we want, it figures out the kernel and stride for us.

In [22]:
rand = torch.rand(3,128,128)

adaptivepool_1 = nn.AdaptiveAvgPool2d((64,64))
adaptivepool_2 = nn.AdaptiveAvgPool2d((64,48))

out_1 = adaptivepool_1(rand)
out_2 = adaptivepool_2(rand)
print(" Avg pol 1: ",out_1.shape)
print("Avg pool 2: ", out_2.shape)

 Avg pol 1:  torch.Size([3, 64, 64])
Avg pool 2:  torch.Size([3, 64, 48])
