<a href="https://colab.research.google.com/github/Aadi11-crypto/WORKBOOK/blob/main/CSD456_Lab05_Simple_CNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
from torch import nn
from torch.nn import functional as F

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import torchvision

## Check GPU

In [4]:
def cpu():
    """Get the CPU device."""
    return torch.device('cpu')

def gpu(i=0):
    """Get a GPU device."""
    return torch.device(f'cuda:{i}')

def num_gpus():
    """Get the number of available GPUs."""
    return torch.cuda.device_count()

def try_gpu(i=0):
    """Return gpu(i) if exists, otherwise return cpu()."""
    if num_gpus() >= i + 1:
        return gpu(i)
    return cpu()

def try_all_gpus():
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    return [gpu(i) for i in range(num_gpus())]

try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cpu'), device(type='cpu'), [])

## Convolution function

In [5]:
def corr2d(X, K):
    """Compute 2D cross-correlation."""
    """ X -> 2D image
        K -> Kernel filter """

    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

In [6]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

## Convolution Layer

In [7]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))

    def forward(self, x):
        return F.conv2d(x, self.weight, bias=self.bias)

## Simple Edge detection

In [8]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [9]:
X.shape

torch.Size([6, 8])

In [10]:
K = torch.tensor([[1.0, -1.0]])

In [11]:
K.shape

torch.Size([1, 2])

In [12]:
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [13]:
X.shape

torch.Size([6, 8])

We can now apply the kernel to the transposed image. As expected, it vanishes.

In [14]:
X.t()

tensor([[1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1.]])

In [15]:
corr2d(X.t(), K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

## Learning a Kernel

In [16]:
# Construct a two-dimensional convolutional layer with 1 output channel and a
# kernel of shape (1, 2). For the sake of simplicity, we ignore the bias here
conv2d = nn.LazyConv2d(1, kernel_size=(1, 2), bias=False)

# The two-dimensional convolutional layer uses four-dimensional input and
# output in the format of (example, channel, height, width), where the batch
# size (number of examples in the batch) and the number of channels are both 1
X1 = X.reshape((1, 1, 6, 8))
Y1 = Y.reshape((1, 1, 6, 7))
lr = 3e-2  # Learning rate

In [17]:
for i in range(20):
    Y_hat = conv2d(X1)
    l = (Y_hat - Y1) ** 2
    conv2d.zero_grad()
    l.sum().backward()
    # Update the kernel (Gradient Descend update)
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f'epoch {i + 1}, loss {l.sum():.3f}')

epoch 2, loss 12.264
epoch 4, loss 3.596
epoch 6, loss 1.233
epoch 8, loss 0.465
epoch 10, loss 0.184
epoch 12, loss 0.074
epoch 14, loss 0.030
epoch 16, loss 0.012
epoch 18, loss 0.005
epoch 20, loss 0.002


In [18]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 0.9952, -1.0045]])

In [19]:
# X.shape

In [20]:
# For Diagonal tensor construction
X2 = torchvision.transforms.functional.rotate(X1,45)
X2

tensor([[[[0., 0., 0., 0., 1., 1., 1., 0.],
          [0., 0., 0., 0., 0., 1., 1., 1.],
          [1., 0., 0., 0., 0., 0., 1., 1.],
          [1., 1., 0., 0., 0., 0., 0., 1.],
          [1., 1., 1., 0., 0., 0., 0., 0.],
          [0., 1., 1., 1., 0., 0., 0., 0.]]]])

1. Construct an image X with diagonal edges.
  1. What happens if you apply the kernel K in this section to it? [2 Marks]
  2. What happens if you transpose X? [1 Marks]
  3. What happens if you transpose K? [1 Marks]

In [21]:
# Construct an image X with diagonal edges
X = torch.tensor([[0.0, 0.0, 0.0, 0.0],
                  [0.0, 1.0, 0.0, 0.0],
                  [0.0, 0.0, 1.0, 0.0],
                  [0.0, 0.0, 0.0, 1.0]])

print("X with diagonal edges:\n", X)


X with diagonal edges:
 tensor([[0., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])


In [22]:
# Define the kernel K
K = torch.tensor([[0.0, 1.0],
                  [2.0, 3.0]])

# Apply the corr2d function to X with kernel K
Y = corr2d(X, K)
print("Output after applying kernel K to X:\n", Y)


Output after applying kernel K to X:
 tensor([[3., 2., 0.],
        [1., 3., 2.],
        [0., 1., 3.]])


In [23]:
# Transpose X
X_T = X.T
print("Transposed X (X^T):\n", X_T)

# Apply the corr2d function to transposed X with kernel K
Y_T = corr2d(X_T, K)
print("Output after applying kernel K to transposed X:\n", Y_T)


Transposed X (X^T):
 tensor([[0., 0., 0., 0.],
        [0., 1., 0., 0.],
        [0., 0., 1., 0.],
        [0., 0., 0., 1.]])
Output after applying kernel K to transposed X:
 tensor([[3., 2., 0.],
        [1., 3., 2.],
        [0., 1., 3.]])


In [24]:
# Transpose K
K_T = K.T
print("Transposed K (K^T):\n", K_T)

# Apply the corr2d function to X with transposed kernel K
Y_K_T = corr2d(X, K_T)
print("Output after applying transposed kernel K to X:\n", Y_K_T)


Transposed K (K^T):
 tensor([[0., 2.],
        [1., 3.]])
Output after applying transposed kernel K to X:
 tensor([[3., 1., 0.],
        [2., 3., 1.],
        [0., 2., 3.]])


2. Design some kernels manually.
  1. Given a directional vector $\mathbf{v} = (v_1, v_2)$, derive an edge-detection kernel that detects edges orthogonal to $\mathbf{v}$, i.e., edges in the direction $(v_2, -v_1)$. [1 Marks]

In [25]:
import torch

def edge_detection_kernel(v1, v2):
    """
    Creates an edge detection kernel orthogonal to the vector v = (v1, v2).
    Edges are detected in the direction (v2, -v1).
    """
    return torch.tensor([[v2, -v1], [v2, -v1]])

# Example: v = (1, 0) detects vertical edges
v1, v2 = 1, 0
K_edge = edge_detection_kernel(v1, v2)
print("Edge detection kernel orthogonal to vector v = (1, 0):\n", K_edge)


Edge detection kernel orthogonal to vector v = (1, 0):
 tensor([[ 0, -1],
        [ 0, -1]])


  2.2. Derive a finite difference operator for the second derivative. What is the minimum size of the convolutional kernel associated with it? Which structures in images respond most strongly to it? [1 Marks]

In [26]:
def second_derivative_kernel():
    """
    Creates a finite difference operator for the second derivative.
    This kernel is the Laplacian operator.
    """
    return torch.tensor([[0, 1, 0], [1, -4, 1], [0, 1, 0]])

# Create the second derivative (Laplacian) kernel
K_second_derivative = second_derivative_kernel()
print("Second derivative (Laplacian) kernel:\n", K_second_derivative)


Second derivative (Laplacian) kernel:
 tensor([[ 0,  1,  0],
        [ 1, -4,  1],
        [ 0,  1,  0]])


   2.3. How would you design a blur kernel? Why might you want to use such a kernel? [1 Marks]

In [27]:
def blur_kernel(box_size=3):
    """
    Creates a simple box filter (mean blur kernel).
    The box_size defines the size of the kernel.
    """
    kernel = torch.ones((box_size, box_size)) / (box_size * box_size)
    return kernel

def gaussian_blur_kernel():
    """
    Creates a 3x3 Gaussian blur kernel.
    """
    return torch.tensor([[1, 2, 1], [2, 4, 2], [1, 2, 1]]) / 16.0

# Box filter blur kernel (3x3)
K_blur = blur_kernel(3)
print("Box filter blur kernel:\n", K_blur)

# Gaussian blur kernel (3x3)
K_gaussian_blur = gaussian_blur_kernel()
print("Gaussian blur kernel:\n", K_gaussian_blur)


Box filter blur kernel:
 tensor([[0.1111, 0.1111, 0.1111],
        [0.1111, 0.1111, 0.1111],
        [0.1111, 0.1111, 0.1111]])
Gaussian blur kernel:
 tensor([[0.0625, 0.1250, 0.0625],
        [0.1250, 0.2500, 0.1250],
        [0.0625, 0.1250, 0.0625]])



  2.4. What is the minimum size of a kernel to obtain a derivative of order $d$? [1 Marks]

In [28]:
def derivative_kernel(d):
    """
    Creates a finite difference kernel for the derivative of order d.
    The minimum size of the kernel is (d+1)x(d+1).
    """
    if d == 1:
        # First derivative kernel (Sobel-like)
        return torch.tensor([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])  # Example: Sobel for x-gradient
    elif d == 2:
        # Second derivative kernel (Laplacian-like)
        return torch.tensor([[0, 1, 0], [1, -4, 1], [0, 1, 0]])
    else:
        raise NotImplementedError(f"Derivative of order {d} is not implemented.")

# Example for the first derivative (d = 1)
K_first_derivative = derivative_kernel(1)
print("First derivative (Sobel-like) kernel:\n", K_first_derivative)

# Example for the second derivative (d = 2)
K_second_derivative = derivative_kernel(2)
print("Second derivative (Laplacian-like) kernel:\n", K_second_derivative)


First derivative (Sobel-like) kernel:
 tensor([[-1,  0,  1],
        [-2,  0,  2],
        [-1,  0,  1]])
Second derivative (Laplacian-like) kernel:
 tensor([[ 0,  1,  0],
        [ 1, -4,  1],
        [ 0,  1,  0]])


3. Create Covolution Layer to handle blur kernel. Apply it on designed input and output. [2 Marks]

In [29]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class BlurConv2D(nn.Module):
    def __init__(self, blur_kernel):
        super(BlurConv2D, self).__init__()
        # Create a 2D convolutional layer with 1 input channel, 1 output channel, and kernel size matching the blur kernel
        self.conv = nn.Conv2d(in_channels=1, out_channels=1, kernel_size=blur_kernel.shape, padding=1, bias=False)
        # Initialize the weights of the convolutional layer to the blur kernel
        self.conv.weight = nn.Parameter(blur_kernel.unsqueeze(0).unsqueeze(0))  # Shape [1, 1, H, W]

    def forward(self, x):
        return self.conv(x)

# Create a simple 3x3 box blur kernel
blur_kernel = torch.tensor([[1.0, 1.0, 1.0],
                            [1.0, 1.0, 1.0],
                            [1.0, 1.0, 1.0]]) / 9.0  # Normalize by the number of elements to average

# Initialize the BlurConv2D layer with the blur kernel
blur_conv_layer = BlurConv2D(blur_kernel)

# Create an input image (6x6) with a sharp edge in the middle
input_image = torch.tensor([[0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
                            [0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
                            [0.0, 0.0, 0.0, 1.0, 1.0, 1.0],
                            [1.0, 1.0, 1.0, 0.0, 0.0, 0.0],
                            [1.0, 1.0, 1.0, 0.0, 0.0, 0.0],
                            [1.0, 1.0, 1.0, 0.0, 0.0, 0.0]])

# Reshape the input to match the format expected by the Conv2d layer [batch_size, channels, height, width]
input_image = input_image.unsqueeze(0).unsqueeze(0)  # Shape: [1, 1, 6, 6]

# Apply the convolution layer to the input image
output_image = blur_conv_layer(input_image)

# Print the input and output images
print("Input Image:\n", input_image.squeeze())
print("Blurred Output Image:\n", output_image.squeeze().detach())


Input Image:
 tensor([[0., 0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1., 1.],
        [0., 0., 0., 1., 1., 1.],
        [1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.],
        [1., 1., 1., 0., 0., 0.]])
Blurred Output Image:
 tensor([[0.0000, 0.0000, 0.2222, 0.4444, 0.6667, 0.4444],
        [0.0000, 0.0000, 0.3333, 0.6667, 1.0000, 0.6667],
        [0.2222, 0.3333, 0.4444, 0.5556, 0.6667, 0.4444],
        [0.4444, 0.6667, 0.5556, 0.4444, 0.3333, 0.2222],
        [0.6667, 1.0000, 0.6667, 0.3333, 0.0000, 0.0000],
        [0.4444, 0.6667, 0.4444, 0.2222, 0.0000, 0.0000]])
