**Simple Architecture of a CNN:**

Image --> Convolution and ReLU --> Pooling --> Fully Connected Layer --> Classification

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

**2D Convolution**

In [2]:
# Size of the image (No. of samples, channels, width, height)
img = (1,3,64,64)
imgT = torch.rand(img) # Image as the pytorch tensor

In [3]:
#Setting the parameters and creating instance
in_channels  = 3 # RGB
out_channels = 10 # Number of kernels
kernel_size  = 5
stride   = 1
padding  = 1

convLayer = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)

In [4]:
out_convLayer = convLayer(imgT)
print(out_convLayer.shape)  # N x Channels (depth) x width x height

torch.Size([1, 10, 62, 62])


**Reversing/Transposing the convolution (used in AutoEncoders)**


In [5]:
in_channels  = 10
out_channels = 3
kernel_size  = 5
stride   = 1
padding  = 1

conv_Transpose_Layer = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding)
out_conv_Transpose_Layer = conv_Transpose_Layer(out_convLayer)
print(out_conv_Transpose_Layer.shape)

torch.Size([1, 3, 64, 64])


**Max Pooling**

In [8]:
kernel_size = 2
stride = 2

max_p = nn.MaxPool2d(kernel_size, stride)
print(out_convLayer.shape) # Before


out_max_p = max_p(out_convLayer)
print(out_max_p.shape) #After

torch.Size([1, 10, 62, 62])
torch.Size([1, 10, 31, 31])


**Flattening**

In [9]:
input = torch.rand(1, 10, 31, 31)
out = torch.flatten(input, 1)  # torch.flatten(input tensor, dim)
print(out.shape)    # (1,  10 * 31 * 31)

torch.Size([1, 9610])
