**Vision Model v0** \
Goal: Recognize images of numbers from a Hugging Face dataset with the appropriate labels

**Data Wrangling**

In [1]:
from datasets import load_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
ds = load_dataset("ylecun/mnist")

In [3]:
ds

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 60000
    })
    test: Dataset({
        features: ['image', 'label'],
        num_rows: 10000
    })
})

In [4]:
import torch
import torch.cuda
import numpy as np
from numba import cuda

In [5]:
# convert pngs to tensors containing pixel values
rows = 60000
width = 28
height = 28

images = torch.empty((rows, width, height), dtype=torch.int64)
for i, row in enumerate(ds['train'].select(range(rows))):
    images[i] = torch.reshape(torch.tensor(list(row['image'].getdata())), (width, height))

**Forward Propagation**

In [6]:
# define kernels
torch.manual_seed(3500)
min_fence = -0.05
max_fence = 0.2

# initialize kernels with random values and transform them to range [-0.2, 0.2)
kernel_layer_1 = torch.rand(2, 5, 5)
kernel_layer_1 = kernel_layer_1 * (max_fence - min_fence) + min_fence

kernel_layer_2 = torch.rand(2, 2, 3, 3)
kernel_layer_2 = kernel_layer_2 * (max_fence - min_fence) + min_fence

In [7]:
print(kernel_layer_1)
print(kernel_layer_2)

tensor([[[-0.0364,  0.1154,  0.1450, -0.0022,  0.0593],
         [ 0.0242,  0.0963,  0.0600,  0.0817,  0.0017],
         [ 0.1528,  0.0777,  0.1599,  0.0273,  0.1158],
         [ 0.0239,  0.0899,  0.0634, -0.0149, -0.0376],
         [ 0.0950,  0.0329,  0.0358, -0.0314,  0.0032]],

        [[ 0.1508,  0.0007,  0.0146,  0.1369,  0.1446],
         [ 0.0624,  0.0093,  0.0758,  0.1156,  0.1740],
         [ 0.0936,  0.0291,  0.1949,  0.1970,  0.0746],
         [ 0.1263,  0.1895,  0.0419,  0.0308,  0.0056],
         [ 0.1420,  0.1938,  0.1158,  0.0516,  0.1152]]])
tensor([[[[-0.0397,  0.1152,  0.1942],
          [ 0.1029,  0.1195,  0.1461],
          [ 0.1586,  0.1036,  0.0433]],

         [[-0.0214,  0.1368,  0.0798],
          [ 0.1655,  0.0692,  0.0755],
          [ 0.0826,  0.1665,  0.0806]]],


        [[[ 0.0009,  0.1110,  0.0878],
          [ 0.1642, -0.0338,  0.1944],
          [ 0.0488,  0.0842,  0.1228]],

         [[ 0.0895, -0.0187,  0.1870],
          [-0.0414,  0.0135,  0.0015],

In [8]:
"""
Transforms matrices into vectors, verifies than they are of the same length, 
then performs a convolution on the vectors by multiplying the corresponding 
components and taking the sum of those products
"""
def convolve(m1: torch.Tensor, m2: torch.Tensor) -> torch.Tensor:
    if len(m1.shape) != 2 or len(m2.shape) != 2:
        raise Exception(f"Matrix m1 has a rank of {len(m1.shape)} and " + 
                        f"Matrix m2 has a rank of {len(m2.shape)}")
    if m1.shape[0] * m1.shape[1] != m2.shape[0] * m2.shape[1]:
        raise Exception("Linear length of matrices are not equal")
    m1 = torch.reshape(m1, (-1,)) 
    m2 = torch.reshape(m2, (-1,))
    return sum(m1 * m2)

In [9]:
import math

In [10]:
def traverse_image(image: torch.Tensor, kernel: torch.Tensor, step: int, activationf: str) -> torch.Tensor:
    if len(kernel.shape) != 2:
        raise Exception(f"Kernel has a rank of {len(kernel.shape)}")
    if kernel.shape[0] % 2 == 0:
        raise Exception("Kernel matrix rank is not odd")
        
    width = kernel.shape[0]
    output = []
    
    for i in range(0, image.shape[1] - width + 1, step):
        for j in range(0, image.shape[0] - width + 1, step):
            current = image[i:i+width,j:j+width]
            convolution = convolve(current, kernel)
            if activationf.lower() == 'relu':
                func = torch.nn.ReLU()
            elif activationf.lower() == 'sigmoid':
                func = torch.nn.Sigmoid()
            else:
                raise Exception(f'Argument "{activationf}" is not a valid activation function')
            output.append(func(convolution))
                
    output = torch.Tensor(output)
    rank = int(math.sqrt(len(output)))
    output = torch.reshape(output, (rank, rank))
    return output

In [11]:
result = traverse_image(images[0], kernel_layer_1[0], 1, 'relu')

torch.Size([22, 22])