In [39]:
import numpy as np
import torch

In [None]:
class Conv2D():
    def __init__(self,in_channels,out_channels,kernel_size,stride = 1,padding=0):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = 1
        self.padding = 1

        self.weights = np.random.randn(out_channels,in_channels,kernel_size,kernel_size)* np.sqrt(1. / (in_channels * kernel_size * kernel_size))
        self.bias = np.zeros((out_channels, 1))

    def forward(self,X):
        batch_size,in_channels,H,W = X.shape
        assert in_channels == self.in_channels

        H_out = (H + 2*self.padding - self.kernel_size) // self.stride + 1
        W_out = (W + 2*self.padding - self.kernel_size) // self.stride + 1
 
        X_padded = np.pad(X,((0,0),(0,0),(1,1),(1,1)),mode='constant')
        output = np.zeros((batch_size,self.out_channels,H_out,W_out))

        for b in range(batch_size):
            for c in range(self.out_channels):
                for i in range(H_out):
                    for j in range(W_out):
                        region = X_padded[b,:,i*self.stride:i*self.stride + self.kernel_size,j*self.stride:j*self.stride+self.kernel_size]
                        output[b,c,i,j] = np.sum(region * self.weights[c])

        return output


In [75]:
class ReLU():
    def __call__(self, X):
        return self.forward(X)
    def forward(self,X):
        return np.maximum(0,X)
    
    def backward(self,grad_output):
        grad = grad_output*(self.input > 0)
        return grad

In [37]:
class MaxPooling2D():
    def __init__(self,kernel_size,stride):
        self.kernel_size = kernel_size
        self.stride = stride

    def forward(self,X):
        (batch_size,channels,h,w) = X.shape

        H_out = (h - self.kernel_size) // self.stride + 1
        W_out = (w - self.kernel_size) // self.stride + 1

        output = np.zeros((batch_size,channels,H_out,W_out))

        for b in range(batch_size):
            for c in range(channels):
                for i in range(0,H_out):
                    for j in range(0,W_out):
                        region = X[b,:,i*self.stride : i*self.stride + self.kernel_size, j*self.stride : j*self.stride + self.kernel_size]
                        output[b,c,i,j] = np.max(region)

        return output

In [38]:
class Flatten:
    def forward(self, X):
        return X.reshape(X.shape[0], -1)

In [54]:
class Softmax():
    def __call__(self,x):
        return self.forward(x)
    def forward(self,X):
        exp_x = np.exp(X - np.max(X,axis=1,keepdims=True))
        return exp_x / np.sum(exp_x,axis=1,keepdims=True)

In [74]:
class Dense():
    def __init__(self,input_dim,output_dim,learning_rate = 0.01):
        self.weights = np.random.randn(input,output_dim) * np.sqrt(2. / input_dim)
        self.bias = np.zeros((1,output_dim))
        self.learning_rate = learning_rate

    def __call__(self,X):
        self.input = X
        return self.forward(X)

    def forward(self,X):
        return np.dot(X,self.weights) + self.bias
    
    def backward(self,grad_output):

        grad_weights = self.input.T @ grad_output
        grad_bias = np.sum(grad_output,axis=0,keepdims=True)

        self.weights-=self.learning_rate*grad_weights
        self.bias-=self.learning_rate*grad_bias

        return grad_output@self.weights.T

In [72]:
class MLP():
    def __init__(self,input_dim,hidden_layers,output_dim):
        self.layers = []
        self.layers.append(Dense(input_dim,hidden_layers[0]))
        self.layers.append(ReLU())

        for i in range(len(hidden_layers) - 1):
            self.layers.append(Dense(hidden_layers[i],hidden_layers[i+1]))
            self.layers.append(ReLU())

        self.layers.append(Dense(hidden_layers[-1],output_dim))
        self.layers.append(Softmax())

    def __call__(self, X):
        return self.forward(X)

    def forward(self,X):
        for layer in self.layers:
            print(X.shape)
            X = layer(X)
        return X

In [73]:
# Example Input (Flattened from CNN)
X_flat = np.random.randn(1, 8 * 16 * 16)  # 1 sample, 2048 features

# Define MLP (Input: 2048, Hidden: [128, 64], Output: 10)
mlp = MLP(input_dim=2048, hidden_layers=[128, 64], output_dim=10)

# Forward Pass
output = mlp(X_flat)

print("MLP Output Shape:", output.shape)  # Expected: (1, 10)
print("MLP Output (Softmax Probabilities):", output)

(1, 2048)
(1, 128)
(1, 128)
(1, 64)
(1, 64)
(1, 10)
MLP Output Shape: (1, 10)
MLP Output (Softmax Probabilities): [[0.00813667 0.06357433 0.07079303 0.01003157 0.09055702 0.1995619
  0.12084198 0.13783279 0.24899346 0.04967727]]
