In [28]:
import torch
import torch.nn.functional as F
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


In [29]:
class MNISTLoader:
    def __init__(self, train_images_path, train_labels_path, test_images_path, test_labels_path):
        self.train_images_path = train_images_path
        self.train_labels_path = train_labels_path
        self.test_images_path = test_images_path
        self.test_labels_path = test_labels_path

    def load_images(self, filename):
        with open(filename, 'rb') as f:
            # Skip the magic number and dimensions (first 16 bytes)
            f.read(16)
            # Read the remaining bytes and reshape into a 28x28 array
            data = np.frombuffer(f.read(), dtype=np.uint8)
            return data.reshape(-1, 28, 28).astype(np.float32) / 255.0  # Normalize to [0, 1]

    def load_labels(self, filename):
        with open(filename, 'rb') as f:
            # Skip the magic number (first 8 bytes)
            f.read(8)
            # Read the remaining bytes as labels
            return np.frombuffer(f.read(), dtype=np.uint8)

    def load_data(self):
        train_images = self.load_images(self.train_images_path)
        train_labels = self.load_labels(self.train_labels_path)
        test_images = self.load_images(self.test_images_path)
        test_labels = self.load_labels(self.test_labels_path)

        # Flatten images to 1D
        # train_images = np.reshape(train_images, (train_images.shape[0], -1))
        # test_images = np.reshape(test_images, (test_images.shape[0], -1))

        return train_images, train_labels, test_images, test_labels


In [30]:
mnist_loader = MNISTLoader(
    train_images_path='C:\\Users\\Raj.Salunkhe\\Desktop\\nn from scratch\\train-images.idx3-ubyte',
    train_labels_path='C:\\Users\\Raj.Salunkhe\\Desktop\\nn from scratch\\train-labels.idx1-ubyte',
    test_images_path='C:\\Users\\Raj.Salunkhe\\Desktop\\nn from scratch\\t10k-images.idx3-ubyte',
    test_labels_path='C:\\Users\\Raj.Salunkhe\\Desktop\\nn from scratch\\t10k-labels.idx1-ubyte'
)

train_images, train_labels, test_images, test_labels = mnist_loader.load_data()

In [31]:
train_images_tensor = torch.from_numpy(train_images).clone()
train_labels_tensor = torch.from_numpy(train_labels).clone()
test_images_tensor = torch.from_numpy(test_images).clone()
test_labels_tensor = torch.from_numpy(test_labels).clone()

In [32]:
train_images_tensor.shape

torch.Size([60000, 28, 28])

In [40]:
class Conv2D(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=1, pool_size=2, pool_stride=2, lr = 0.001, apply_padding = True, apply_pooling = False):
        """
        Initialize a Conv2D layer.
        Args:
            in_channels (int): Number of input channels.
            out_channels (int): Number of output channels.
            kernel_size (int or tuple): Size of the convolution kernel (filter).
            stride (int): Stride of the convolution.
            padding (int): Amount of zero-padding around the input.
        """
        super(Conv2D, self).__init__()  # Initialize the parent class (nn.Module)
        
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size if isinstance(kernel_size, tuple) else (kernel_size, kernel_size)
        self.stride = stride
        self.padding = padding
        self.pool_size = pool_size
        self.pool_stride = pool_stride
        self.learning_rate = lr
        self.apply_padding = apply_padding
        self.apply_pooling = apply_pooling
        
        
        # Initialize weights and biases using nn.Parameter to track gradients
        self.weights = torch.randn(out_channels, in_channels, *self.kernel_size, requires_grad=True) * 0.01
        self.biases = torch.zeros(out_channels, 1, requires_grad=True)

        self.params = [list(torch.flatten(self.weights))].extend(list(torch.flatten(self.biases)))

    def add_padding(self, input_matrices: torch.tensor):
        batch_size, in_channels, height, width = input_matrices.shape

        padded_matrices = torch.zeros(batch_size, in_channels, height + 2 * self.padding, width + 2 * self.padding)

        for b in range(batch_size):
            for c in range(in_channels):
                input_matrix = input_matrices[b, c]
                
                horizontal_pads = torch.zeros(height, self.padding, requires_grad=True)
                vertical_pads = torch.zeros(self.padding, width + 2 * self.padding, requires_grad=True)

                padded_matrix = torch.cat((horizontal_pads, input_matrix), dim=1)  
                padded_matrix = torch.cat((padded_matrix, horizontal_pads), dim=1)

                padded_matrix = torch.cat((vertical_pads, padded_matrix), dim=0)  
                padded_matrix = torch.cat((padded_matrix, vertical_pads), dim=0)

                padded_matrices[b, c] = padded_matrix

        return padded_matrices
    
    
    def forward(self, input_matrix):
        """
        Key Assumptions:
        1. Input, filters are square
        2. Stride and padding is one dimensional
        """
        B, C, H, W = input_matrix.shape
        if self.apply_padding:
            input_matrix = self.add_padding(input_matrix)

        if self.apply_pooling:
            output = self.pooling(input_matrix ,  pool_type='max')
            return output

        # Extracting the dimensions out of the input matrix
        kernel_height, kernel_width = self.kernel_size
        stride = self.stride
        padding = self.padding

        OH = round(((W + 2*padding - kernel_height)/stride)) + 1
        OW = round(((W + 2*padding - kernel_width)/stride)) + 1


        output = torch.zeros(B, self.out_channels, OH, OW)

        for i in range(OH):
            for j in range(OW):
                height_start = i * stride
                width_start = j * stride

                height_end = height_start + kernel_height
                width_end = width_start + kernel_width

                input_region = input_matrix[:, :, height_start:height_end, width_start:width_end]

                for k in range(self.out_channels):
                    output[:, k, i, j] = torch.sum(input_region * self.weights[k], dim=(1, 2, 3)) + self.biases[k]

        return output
    
    def pooling(self, input_matrix, pool_type='max'):
        B, C, H, W = input_matrix.shape

        OH = (H - self.pool_size) // self.pool_stride + 1
        OW = (W - self.pool_size) // self.pool_stride + 1

        output = torch.zeros(B, C, OH, OW)

        for b in range(B):
            for c in range(C):
                for i in range(OH):
                    for j in range(OW):
                        height_start = i * self.pool_stride
                        width_start = j * self.pool_stride

                        height_end = height_start + self.pool_size
                        width_end = width_start + self.pool_size

                        input_region = input_matrix[b, c, height_start:height_end, width_start:width_end]

                        if pool_type == 'max':
                            # Max pooling
                            output[b, c, i, j] = torch.max(input_region)
                        elif pool_type == 'average':
                            # Average pooling
                            output[b, c, i, j] = torch.mean(input_region)
                        else:
                            raise ValueError(f"Unsupported pool_type: {pool_type}")

        return output

    def mse_loss(self, output, target):
        """Mean Squared Error Loss"""
        return torch.mean((output - target) ** 2)

    def backward(self, loss):
        """
        Backward pass to compute the gradients using autograd.
        Args:
            loss: The computed loss value (scalar).
        """
        # Perform backpropagation to compute gradients
        loss.backward()

    def update_weights(self):
        """
        Update the weights using gradient descent (or any optimizer).
        Here, we are doing simple gradient descent.
        """
        with torch.no_grad():  
            self.weights -= self.learning_rate * self.weights.grad
            self.biases -= self.learning_rate * self.biases.grad

            self.weights.grad.zero_()
            self.biases.grad.zero_()
        

conv = Conv2D(in_channels=3, out_channels=1, kernel_size=3)
input_matrix = torch.randn(1, 3, 10, 10)
output = conv.forward(input_matrix)
pooled_op = conv.pooling(output)

output.shape



torch.Size([1, 1, 10, 10])

In [None]:
model_layers = [
    Conv2D(in_channels=1, out_channels=3, kernel_size=3, apply_padding=True),
    Conv2D(in_channels=3, out_channels=2, kernel_size=3, apply_padding=False),
    Conv2D(in_channels=2, out_channels=1, kernel_size=3, apply_pooling=True)

]

model_params = []
for layer in model_layers:
    # model_params.extend(layer.params)
    model_params.extend(list(torch.flatten(layer.weights)))
    model_params.extend(list(torch.flatten(layer.biases)))


initial_input = None
target = None
final_layer_weights = torch.randn(100, 10, requires_grad=True)
final_layer_biases = torch.randn(10, requires_grad=True)



def forward(initial_input, target, final_layer_weights, final_layer_biases):
    output = initial_input
    for layer in model_layers:
        output = layer.forward(output)
    
    # Flatten properly
    flattened_layer = output.view(output.shape[0], -1)

    # Fully connected layer
    output = flattened_layer @ final_layer_weights + final_layer_biases

    # Mean Squared Error loss
    loss = torch.mean((output - target) ** 2)

    return loss

In [48]:
def train(inputs, targets, lr = 0.01, iters = 100):
    for i in range(iters):
        loss = forward(inputs, targets, final_layer_weights, final_layer_biases)
        print(loss.item())
        loss.backward()

        for param in model_params:
            param -= lr * param.grad
            param.grad.zero_()


    return loss


        


In [49]:
import math

inputs = train_images_tensor
targets = torch.from_numpy(train_labels).clone().float()

loss = math.inf

final_loss = train(inputs , targets)
    

ValueError: not enough values to unpack (expected 4, got 3)

In [8]:
inputs = torch.randn(1, 1, 28, 28)  # Batch of 32 grayscale images (28x28)
labels = torch.randint(0, 10, (1,))  # Random labels

# Forward pass
outputs = model.forward(inputs)

# Compute loss and backward pass
loss = model.backward(outputs, labels)
print(f"Loss: {loss.item()}")

# Update weights
model.update_weights()

Loss: 2.2905924320220947


AttributeError: 'Linear' object has no attribute 'learning_rate'