**Vision Model v0** \
Goal: Recognize images of numbers from a Hugging Face dataset with the appropriate labels\
Author: Atman Singh\
Date: 12/29/2024

**Data Wrangling**

In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = load_dataset("ylecun/mnist")

In [3]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [4]:
import torch
import torch.cuda
import numpy as np
from numba import cuda

In [5]:
# convert pngs to tensors containing pixel values
rows = 60000
width = 28
height = 28

images = torch.empty((rows, width, height), dtype=torch.int64)
for i, row in enumerate(ds['train'].select(range(rows))):
    images[i] = torch.reshape(torch.tensor(list(row['image'].getdata())), (width, height))

**Forward Propagation**

In [6]:
# define kernels
torch.manual_seed(3500)
min_fence = -0.2
max_fence = 0.2

# initialize kernels with random values and transform them to range [-0.2, 0.2)
kernel_layer_one = torch.rand(2, 1, 5, 5)
kernel_layer_one = kernel_layer_one * (max_fence - min_fence) + min_fence

kernel_layer_two = torch.rand(4, 2, 3, 3)
kernel_layer_two = kernel_layer_two * (max_fence - min_fence) + min_fence

In [7]:
print(kernel_layer_one)
print(kernel_layer_two)

tensor([[[[-0.1782,  0.0646,  0.1120, -0.1235, -0.0251],
          [-0.0813,  0.0340, -0.0240,  0.0107, -0.1173],
          [ 0.1244,  0.0044,  0.1358, -0.0763,  0.0652],
          [-0.0817,  0.0238, -0.0186, -0.1438, -0.1802],
          [ 0.0320, -0.0673, -0.0626, -0.1702, -0.1149]]],


        [[[ 0.1213, -0.1190, -0.0966,  0.0991,  0.1113],
          [-0.0201, -0.1052,  0.0012,  0.0649,  0.1585],
          [ 0.0298, -0.0734,  0.1919,  0.1951, -0.0007],
          [ 0.0821,  0.1832, -0.0529, -0.0707, -0.1110],
          [ 0.1072,  0.1901,  0.0652, -0.0375,  0.0644]]]])
tensor([[[[-0.1835,  0.0642,  0.1907],
          [ 0.0446,  0.0712,  0.1137],
          [ 0.1337,  0.0457, -0.0508]],

         [[-0.1543,  0.0988,  0.0077],
          [ 0.1449, -0.0093,  0.0008],
          [ 0.0122,  0.1464,  0.0089]]],


        [[[-0.1186,  0.0576,  0.0205],
          [ 0.1428, -0.1741,  0.1910],
          [-0.0419,  0.0147,  0.0765]],

         [[ 0.0232, -0.1500,  0.1792],
          [-0.1863, -0.09

In [8]:
"""
Transforms matrices into vectors, verifies than they are of the same length, 
then performs a convolution on the vectors by multiplying the corresponding 
components and taking the sum of those products
"""
def convolve(m1: torch.Tensor, m2: torch.Tensor) -> torch.Tensor:
    if len(m1.shape) != 2 or len(m2.shape) != 2:
        raise Exception(f"Matrix m1 has a rank of {len(m1.shape)} and " + 
                        f"Matrix m2 has a rank of {len(m2.shape)}")
    if m1.shape[0] * m1.shape[1] != m2.shape[0] * m2.shape[1]:
        raise Exception("Linear length of matrices are not equal")
    m1 = torch.reshape(m1, (-1,)) 
    m2 = torch.reshape(m2, (-1,))
    return sum(m1 * m2)

In [9]:
import math

In [130]:
def traverse_image(image: torch.Tensor, kernel: torch.Tensor, step: int) -> torch.Tensor:
    if len(kernel.shape) != 2:
        raise Exception(f"Kernel has a rank of {len(kernel.shape)}")
    if kernel.shape[0] % 2 == 0:
        raise Exception("Kernel matrix rank is not odd")
        
    width = kernel.shape[0]
    output = []
    
    for i in range(0, image.shape[1] - width + 1, step):
        for j in range(0, image.shape[0] - width + 1, step):
            current = image[i:i+width,j:j+width]
            convolution = convolve(current, kernel)
            output.append(convolution)
                
    output = torch.Tensor(output)
    rank = int(math.sqrt(len(output)))
    output = torch.reshape(output, (rank, rank))
    return output

In [131]:
def max_pool(matrix: torch.Tensor, pool_size: tuple) -> torch.Tensor:
    if len(matrix.shape) != 2:
        raise Exception(f"Matrix has a rank of {len(matrix.shape)}")
    if matrix.shape[0] % pool_size[0] != 0 or matrix.shape[1] % pool_size[1] != 0:
        raise Exception(f"Pool size {pool_size} is not a multiple of matrix shape {matrix.shape}")

    width = pool_size[1]
    height = pool_size[0]
    output = []
    
    for i in range(0, matrix.shape[1] - width + 1, width):
        for j in range(0, matrix.shape[0] - height + 1, height):
            output.append(torch.max(matrix[i:i+width,j:j+height]))
    
    output = torch.Tensor(output)
    rank = int(math.sqrt(len(output)))
    output = torch.reshape(output, (rank, rank))
    return output

In [146]:
def forward(data: torch.Tensor, kernels: list, activation_functions: list, step: int, pool_size: tuple) -> torch.Tensor:
    CHANNEL_INDEX = 1
    MS_PER_MATRIX = 568.25
    print(f"ETA: {round(len(data) * (MS_PER_MATRIX / 1000 / 60), 2)} minutes")
    
    if not (isinstance(data, torch.Tensor) or isinstance(data, list)):
        raise Exception(f"Data is not of type 'list', inputted type is: {type(data)}")
    if not isinstance(kernels, list):
        raise Exception(f"Kernels is not of type 'list', inputted type is: {type(kernels)}")
    if len(kernels) != len(activation_functions):
        raise Exception(f"Number of kernels inputted ({len(kernels)}) does " +
                        f"not equal number of activation functions inputted " +
                        f"({len(activationf)})")

    output = []
    for i, matrix in enumerate(data):
        current_matrices = [matrix]
        for j, kernel_layer in enumerate(kernels):
            if kernel_layer.shape[CHANNEL_INDEX] != len(current_matrices):
                    raise Exception(f"On iteration {j}: Number of kernels ({kernel_layer.shape[CHANNEL_INDEX]}) " +
                                    f"does not equal number of channels ({len(current_matrices)})")
            if activation_functions[j].lower() == 'relu':
                func = torch.nn.ReLU()
            elif activation_functions[j].lower() == 'sigmoid':
                func = torch.nn.Sigmoid()
            else:
                raise Exception(f'Activation function "{activation_functions[j]}" ' +
                               f"is not a valid activation function.")
                
            updated_matrices = []
            for kernel in kernel_layer:   
                convolutions = []
                for current_matrix, channel in zip(current_matrices, kernel):
                    channel_convolution = traverse_image(current_matrix, channel, step)
                    convolutions.append(channel_convolution)
                
                convolution = func(torch.stack(convolutions, dim=0).sum(dim=0))
                convolution_pooled = max_pool(convolution, pool_size)
                updated_matrices.append(convolution_pooled)
                
            current_matrices = updated_matrices
            
        current_matrices = torch.stack(current_matrices, dim = 0)
        output.append(current_matrices)

    return torch.stack(output, dim=0)

In [149]:
%%time

# feature extraction using forward propagation pipeline
kernels = [kernel_layer_one, kernel_layer_two]
activation_functions = ['relu', 'sigmoid']
features = forward(images[:1], kernels, activation_functions, 1, (2,2))
print(features)

ETA: 0.01 minutes
tensor([[[[1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
          [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 9.5345e-01],
          [9.9962e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
          [9.9939e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00],
          [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 9.5363e-01]],

         [[9.9998e-01, 9.9960e-01, 1.5647e-12, 2.4972e-21, 6.5391e-18],
          [9.9996e-01, 9.5045e-01, 5.8247e-12, 1.1468e-10, 2.9252e-01],
          [1.0000e+00, 9.5221e-01, 9.3785e-01, 9.0696e-08, 4.3083e-12],
          [9.9999e-01, 1.0000e+00, 9.9989e-01, 5.3226e-19, 1.1943e-04],
          [9.9999e-01, 7.8089e-16, 4.1102e-18, 8.9185e-01, 2.1627e-01]],

         [[1.0000e+00, 1.0000e+00, 1.0000e+00, 9.9761e-01, 9.6000e-01],
          [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0042e-08, 1.1676e-02],
          [1.0000e+00, 1.0000e+00, 1.0000e+00, 1.3398e-03, 5.3263e-03],
          [9.9996e-01, 1.0000e+00, 1.0000e

In [140]:
# flattening
flattened = torch.flatten(features)
print(flattened)

tensor([1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 9.5345e-01, 9.9962e-01, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 9.9939e-01, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
        9.5363e-01, 9.9998e-01, 9.9960e-01, 1.5647e-12, 2.4972e-21, 6.5391e-18,
        9.9996e-01, 9.5045e-01, 5.8247e-12, 1.1468e-10, 2.9252e-01, 1.0000e+00,
        9.5221e-01, 9.3785e-01, 9.0696e-08, 4.3083e-12, 9.9999e-01, 1.0000e+00,
        9.9989e-01, 5.3226e-19, 1.1943e-04, 9.9999e-01, 7.8089e-16, 4.1102e-18,
        8.9185e-01, 2.1627e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 9.9761e-01,
        9.6000e-01, 1.0000e+00, 1.0000e+00, 1.0000e+00, 1.0042e-08, 1.1676e-02,
        1.0000e+00, 1.0000e+00, 1.0000e+00, 1.3398e-03, 5.3263e-03, 9.9996e-01,
        1.0000e+00, 1.0000e+00, 2.1976e-03, 6.8715e-10, 1.0000e+00, 1.0000e+00,
        1.0000e+00, 5.3075e-06, 7.9571e-