In [40]:
import numpy as np 
import matplotlib.pyplot as plt  
import torch
import torch.nn as nn 
import torch.nn.functional as F  
import torch.optim as optim
import cv2

In [41]:
def sigmoid(x):
    return (1 / (1 + np.exp(-x)))

def relu(x):
    return np.maximum(0, x)

In [42]:
input_image = cv2.imread('C:/Users/91932/OneDrive/Desktop/ML/VSCode30/DL/Pictures/dog.png')
print(input_image.shape)

(252, 242, 3)


In [43]:
class Conv2D_scratch(nn.Module):
    def __init__(self, n_kernels, kernel_size, input_channels, padding = 0, stride = 1):
        super(Conv2D_scratch, self).__init__()
        self.n_kernels = n_kernels
        self.kernel_size = kernel_size 
        self.input_channels = input_channels 
        self.padding = padding 
        self.stride = stride 
        # Initializing kernels with random weights
        # (n_kernels , input_channels, kernel_height, kernel_width)
        self.kernels = np.random.randn(n_kernels, input_channels, kernel_size, kernel_size) * 0.1
        self.biases = np.zeros(n_kernels)           # (1 bias/kernel)/feature_map
    
    def forward(self, input):
        if input.ndim == 3:               # (channels, height, width)
            input = np.expand_dims(input, axis = 0)        # (1(batch dim), channels, height, width)
        
        n_batch, n_channels, n_height, n_width = input.shape      # image aspects 
        k_height, k_width = self.kernel_size, self.kernel_size    # kernel aspects 
        if self.padding > 0:
            input_padded = np.pad(input, (0, 0), (0, 0), 
            (self.padding, self.padding), (self.padding, self.padding), mode = 'constant', constant_values = 0)
            n_height_padded, n_width_padded = input_padded.shape[2], input_padded.shape[3]
        else:
            input_padded = input 
            n_height_padded, n_width_padded = n_height, n_width
        
        out_height = int((n_height_padded - k_height) / self.stride) + 1 
        out_width = int((n_width_padded - k_width) / self.stride) + 1
        # Initialize output feature maps: (batch_size, num_kernels, out_h, out_w)
        output = np.zeros((n_batch, self.n_kernels, out_height, out_width)) 
        
        for b in range(n_batch):
            for k in range(self.n_kernels):
                curr_kernel = self.kernels[k]
                for y in range(out_height):
                    for x in range(out_width):
                        y_start = y*self.stride
                        y_end = y_start + k_height
                        x_start = x*self.stride 
                        x_end = x_start + k_width  
                        
                        # [batch_1, all channels, cur_height_region, cur_width_region]
                        roi = input_padded[b, :, y_start:y_end, x_start:x_end]
                        output[b, k, y, x] = np.sum(roi * curr_kernel) + self.biases[k]
        return output

In [44]:
class MaxPooling2D_scratch(nn.Module):
    def __init__(self, pool_size = 2, stride = 2):
        super(MaxPooling2D_scratch, self).__init__()
        self.pool_size = pool_size 
        self.stride = stride 
    
    def forward(self, input):
        n_batch, n_channels, n_height, n_width = input.shape
        p_height, p_width = self.pool_size, self.pool_size 
        
        out_h = int((n_height - p_height) / self.stride) + 1 
        out_w = int((n_width - p_width) / self.stride) + 1 
        output = np.zeros((n_batch, n_channels, out_h, out_w))
        
        for b in range(n_batch):
            for c in range(n_channels):
                for y in range(out_h):
                    for x in range(out_w):
                        y_start = y*self.stride
                        y_end = y_start + p_height
                        x_start = x*self.stride
                        x_end = x_start + p_width
                        
                        roi = input[b, c, y_start:y_end, x_start:x_end]
                        output[b, c, y, x] = np.max(roi)
        return output

In [45]:
class Flatten_scratch:
    def forward(self, input):
        self.original_shape = input.shape    # (b, n_c, n_h, n_w)
        return input.reshape(input.shape[0], -1)     # flattened shape : (b * n_c * n_h * n_w)

In [46]:
class Dense_scratch:
    def __init__(self, input_size, output_size):
        super(Dense_scratch, self).__init__()   
        self.weights = np.random.randn(input_size, output_size) * 0.1 
        self.biases = np.zeros((1, output_size))
    
    def forward(self, input):
        self.input = input 
        return np.dot(input, self.weights) + self.biases
        

In [47]:
class Relu_scratch:
    def forward(self, input):
        self.input = input 
        return np.maximum(0, input)

#### Calling methods from all classes

In [48]:
input_image = cv2.imread('C:/Users/91932/OneDrive/Desktop/ML/VSCode30/DL/Pictures/dog.png')
print(input_image.shape)

(252, 242, 3)


In [49]:
input_image_tensor = torch.from_numpy(input_image).permute(2, 0, 1)
print(input_image_tensor.shape)
print(input_image_tensor.shape[0])
print(input_image_tensor.shape[1])
print(input_image_tensor.shape[2])

torch.Size([3, 252, 242])
3
252
242


In [53]:
# Layer 1: Convolution + ReLU 
conv1 = Conv2D_scratch(
    n_kernels = 4, 
    kernel_size = 3, 
    input_channels = input_image_tensor.shape[0],
    stride = 1, 
    padding = 0
)

conv_output1 = conv1.forward(input_image_tensor)
print(f'Image after conv1 : {conv_output1.shape}')

reLu = Relu_scratch()
relu_output1 = reLu.forward(conv_output1)
print(f'Image after Relu_1 : {relu_output1.shape}')

Image after conv1 : (1, 4, 250, 240)
Image after Relu_1 : (1, 4, 250, 240)


In [54]:
# Layer 2: Max Pooling 
MaxPool_2d = MaxPooling2D_scratch(pool_size = 2, stride = 2)
pool_output1 = MaxPool_2d(relu_output1)
print(f'Image after MaxPool_1 : {pool_output1.shape}')

Image after MaxPool_1 : (1, 4, 125, 120)


In [58]:
# Layer 3 : Convolution + ReLu
conv2 = Conv2D_scratch(
    n_kernels = 8, 
    kernel_size = 3,  
    input_channels = conv_output1.shape[1],             # 4 input channels 
    stride = 1, 
    padding = 0
)

conv_output2 = conv2(pool_output1)
print(f'Image after conv2 : {conv_output2.shape}')

relu2 = Relu_scratch()  
relu_output2 = relu2.forward(conv_output2)
print(f'Image after Relu_2 : {relu_output2.shape}')

Image after conv2 : (1, 8, 123, 118)
Image after Relu_2 : (1, 8, 123, 118)


In [59]:
# Layer 4 : Flatten
flat1 = Flatten_scratch()
flatten_output = flat1.forward(relu_output2)
print(f"After Flatten shape: {flatten_output.shape}")

After Flatten shape: (1, 116112)


In [69]:
# Layer 5 : Fully connected 
fc_input_size = flatten_output.shape[1]   
num_classes = 2 # if a binary classification problem 

fc1 = Dense_scratch(input_size = fc_input_size, output_size = num_classes)
fc_output1 = fc1.forward(flatten_output)

print(f"After FC1 (Output Layer) shape: {fc_output1.shape}")

After FC1 (Output Layer) shape: (1, 2)


In [71]:
print(f"Raw output (logits) from FC layer for batch 0: {fc_output1[0]}")

Raw output (logits) from FC layer for batch 0: [-578.32088934  746.63684904]


In [73]:
sigmoid(fc_output1[0][1])

1.0