<a href="https://colab.research.google.com/github/Rajshree-Th/CNN/blob/main/CNN_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <font color='white'>Implement CNN from scratch</font>

### Today in this project we'll compute the convolutional operations from scratch using Numpy
We'll then verify the correctness of the our implementation using a "grader" function/cell.

The grader fucntion would validate the correctness of our code. 

### Task 1. Calcuate the shape of the output image
We use the formula given to calculate the shape of out output image after performing the required operations
<img src='https://i.imgur.com/BlepuHW.jpg'>

We are going to use stride, s=1 for simplicity

In [16]:
# https://stats.stackexchange.com/questions/351115/convolution-with-a-non-square-kernel
# Assuming stride=1 for simplicity
# Importing numpy with an alias "np"
import numpy as np

# Defining a function to calculate the shape of the output
def calculate_output_shape(img_size, kernel_size, padding):
    img_length = img_size[0]
    img_width = img_size[1]
    kernel_length = kernel_size[0]
    kernel_width = kernel_size[1]
    # Return -1,-1 if the shape of kernel is greater than img_size
    # Using the above formula to calculate output_length and output_width
    if kernel_length > img_length and kernel_width > img_width:
        output_length, output_width =  -1, -1
    else:
        output_length = int(np.floor((img_length + 2 * padding - kernel_length) / 1) + 1)
        output_width = int(np.floor((img_width + 2 * padding - kernel_width) / 1) + 1)
    
    # Returning output_length and output_width
    return output_length, output_width

In [17]:
# Checking the implementation of the above function "calculate_output_shape"
# This grader function would result "True" if the above function is implemented correctly.
def grader_calculate_size(l1,w1,l2,w2,l3,w3):
    case_1 = (l1==220 and w1==220)
    case_2 = (l2==-1 and w2==-1)
    case_3 = (l3==128 and w3==128)
    assert(case_1 and case_2 and case_3)
    return True

l1,w1 = calculate_output_shape((224,224), (5,5),0)
l2,w2 = calculate_output_shape((10,10), (12,12),1)
l3,w3 = calculate_output_shape((128,128), (3,3),1)

grader_calculate_size(l1,w1,l2,w2,l3,w3)

True

### Task 2. Get the padded image
In this task we have to add layer of zeros as padding layers in the original image

In [18]:
# Defining a function to add padding layers in the original image
def get_padded_image(input_img, padding_width):
    # If the padding length is less than or equal to 0 then return the original image
    if(padding_width <= 0):
        return input_img
    # Else if padding_width is greater than zero
    else:
        # Calculating size of padded image by using the formula (n+2p) where "n" is the original image size, and "p" is the padding width
        # Initialising an array with value zeros
        img_with_padding = np.zeros((input_img.shape[0] + 2 * padding_width, input_img.shape[1] + 2 * padding_width))
        
        # Using this formula: (Padded_image_size - Input_image_size) / 2
        # Calculating the coordinates of the region in the padded image such that the original image is placed centered in the padded image.         
        x = int((img_with_padding.shape[0] - input_img.shape[0]) / 2)
        y = int((img_with_padding.shape[1] - input_img.shape[1]) / 2)
        
        # Forming padded image
        # Placing the original image in the padded image by assigning the pixel values of the original image to the corresponding region in the padded image.
        img_with_padding[x:-x, y:-y] = input_img

    # Returning the newly formed padded image as a numpy array
    return img_with_padding

In [19]:
# Checking the implementation of the above function "get_padded_image"
# This grader function would result "True" if the above function is implemented correctly.
def grader_padding(img1,img2):
    a1 = ((img1.shape)==(8,8))
    a2 = ((img2.shape)==(4,4))
    assert(a1 and a2)
    b1 = (img1.sum(axis=1)==[ 0.,  0., 10., 26., 42., 58.,  0.,  0.])
    b2 = (img1.sum(axis=0)==[ 0, 0, 28, 32, 36, 40,  0,  0])
    assert(b1.all() and b2.all())
    return True

img = np.array([[1,2,3,4], 
                [5,6,7,8], 
                [9,10,11,12], 
                [13,14,15,16]])

img1 = get_padded_image(img, 2)
img2 = get_padded_image(img, 0)

grader_padding(img1, img2)

True

### Task 3. Performing convolution
In this task, we are going to add layer of zeros as padding layers in the original image

In [20]:
'''
Here we'll define a function called "convolve" that takes an image matrix (img), a kernel matrix (kernel), and an integer (pad) as input, 
and returns the convolved image matrix as output. The convolved image matrix (output) will be obtained by convolving the input image with the given kernel 
matrix using a sliding window approach.
'''
# Defining convolve function 
def convolve(img, kernel, pad):
    # Calcualting the size of required image by using the function "calculate_output_shape" defined earlier
    tgt_size_length, tgt_size_width = calculate_output_shape((img.shape[0], img.shape[1]), 
                                                             (kernel.shape[0], kernel.shape[1]), pad)
    # Initializing the convolved image matrix as a matrix of all zeros with the requiered size
    convolved_img = np.zeros(shape=(tgt_size_length, tgt_size_width))
    # Padding the input image matrix by using the "get_padded_image" function with the 
    # given padding value to create a new image matrix with additional rows and columns of zeros.
    new_img = get_padded_image(img, pad)
    
    # "for" loops to iterate over each pixel of the convolved image matrix
    for i in range(0, convolved_img.shape[0]):
        for j in range(0, convolved_img.shape[1]):
            # Extracting a submatrix (pix_img) that is equal size of the kernal matrix
            pix_img = new_img[i:i+kernel.shape[0], j:j+kernel.shape[1]]
            
            # Initializing a variable "pix" with zero
            pix = 0
            # Calculating the pixel values of convolved image
            for a in range(0, pix_img.shape[0]):
                for b in range(0, pix_img.shape[1]):
                    # Summing the element-wise multiplication of the kernel matrix and the corresponding submatrix of the padded image matrix
                    pix += kernel[a][b] * pix_img[a][b]
            # Storing the pixel value in the corresponding pixel location of the convolved image matrix
            convolved_img[i][j] = pix 
    
    # Finally returning the convolved image matrix
    return convolved_img

In [21]:
# Checking the implementation of the above function "convolve"
# This grader function would result "True" if the above function is implemented correctly.
def grader_convolve(conv_img):
    a1 = (conv_img.shape==(5,5))
    a2 = (conv_img.sum(axis=1)==([10, 26, 42, 58,  0]))
    a3 = (conv_img.sum(axis=0)==([ 0,28, 32, 36, 40]))
    assert(a1 and a2.all() and a3.all())
    return True

img = np.array([[1,2,3,4],
                [5,6,7,8], 
                [9,10,11,12],
                [13,14,15,16]])
kernel = np.array([[1,-1], [0,1]])

conv_img = convolve(img, kernel, 1)
grader_convolve(conv_img)

True

### Task 4: Perform Maxpooling with strides


In [22]:
'''
This function will calculate the size of the output feature map that will be produced after applying max pooling to the original image.
'''
def output_shape(img_size, pool_size, stride):
    
    # Assigning the length and width of the input image to the variables img_length and img_width
    img_length = img_size[0]
    img_width = img_size[1]
    
    # Calculating the length and width of the output feature map using the formula: 
    # output_size = ((input_size - pool_size) / stride) + 1
    # And rounding down the output size to the nearest integer using "np.floor" function
    output_length = int(np.floor((img_length-pool_size) / stride) + 1)
    output_width = int(np.floor((img_width-pool_size) / stride) + 1)

    # Returning the output length and width as a tuple (output_length, output_width) 
    return output_length, output_width

In [23]:
'''
This function will generate a matrix containing the maximum values of each pool in the given input image
'''
def get_maxpools(img, pool_size, stride):
    # Calling the "output_shape" function to calculate the dimensions of the max-pooled matrix
    tgt_size_length, tgt_size_width = output_shape((img.shape[0], img.shape[1]), pool_size, stride)
    # Initializing a matrix (pooling_mat) of zeros with the calculated dimensions
    pooling_mat = np.zeros(shape=(tgt_size_length, tgt_size_width))
    # Initialising an array (arr)
    arr=[]

    # Initializing nested "for" loops to iterate over the input image with a stride parameter 
    # which allows us to skip over pixels and process only the important pixels
    for i in range(0, img.shape[0]-1, stride):
        for j in range(0, img.shape[1]-1, stride):
            # Extracting a submatrix (pix_img) that is equal size of the pool size
            pix_img = img[i:i+pool_size, j:j+pool_size]
            # Finding max value in the submatrix by using "np.max"
            max = int(np.max(pix_img))
            # Storing the max values in an array arr temporarily
            arr = np.append(arr, max)
   
    # Reshaping the array arr into into the dimensions of the max-pooled matrix        
    pooling_mat = arr.reshape(tgt_size_length, tgt_size_width)
    
    # Returning the matrix (pooling_mat) which now contains maximum values of each pool in the input 
    return pooling_mat

In [24]:
# Checking the implementation of the above functions "output_shape" and "get_maxpools"
# This grader function would result "True" if the above functions are implemented correctly.
def pooling_final(pooled_output):
    a1 = (pooled_output.sum(axis=0) == [30, 33, 36])
    a2 = (pooled_output.sum(axis=1) == [21, 33, 45])
    assert(a1.all() and a2.all())
    return True
img = np.array([[1,2,3,4],
                [5,6,7,8],
                [9,10,11,12],
                [13,14,15,16]])
pool_size = 2
stride = 1
pooled_output = (get_maxpools(img, pool_size, stride))
pooling_final(pooled_output)

True