In [None]:
# ploting
%matplotlib inline
# imports
import torch
import numpy as np
import matplotlib.pyplot as plt
from itertools import product


In [None]:
# data loading
cifar_sample = np.load('resources/cifar_sample.npy')
# get a first random image
np_image = cifar_sample[0]
# this should plot a blurry frog
plt.imshow(np_image.transpose(1,2,0))

In [115]:
from torchvision import transforms
import math

def convolution(image: torch.tensor,
                filters: torch.tensor,
                bias: torch.tensor,
                stride: int = 1,
                padding: int = 1):
    """
    :param image: torch.Tensor
        Input image of shape (C, H, W)
    :param filters: torch.Tensor
        Filters to use in convolution of shape (K, C, F, F)
    :param bias: torch.Tensor
        Bias vector of shape (K,)
    :param stride: int
        Stride to use in convolution
    :param padding: int
       Zero-padding to add on all sides of the image
    """
    # get image dimensions
    img_channels, img_height, img_width = image.shape
    n_filters, filter_channels, filter_size, filter_size = filters.shape
    # should img_channels == filter_channels ??
    assert img_channels == filter_channels , 'Number of image channels should equal number of filter channels'

    # calculate the dimensions of the output image
    out_height = math.floor((2*padding + img_height - filter_size)/stride) + 1
    out_width = math.floor((2*padding + img_width - filter_size)/stride) + 1
    out_channels = n_filters

    img_for_iterating = transforms.Pad(padding=padding, fill=0).forward(img=image)
    output = torch.FloatTensor(size=(out_channels, out_height, out_width))

    for c in range(out_channels):
        for h in range(out_height):
            for w in range(out_width):
                val = 0
                for i in range(filter_channels):
                    tmp_h = stride * h
                    tmp_w = stride * w
                    img_fragment = img_for_iterating[i, tmp_h:tmp_h+filter_size, tmp_w:tmp_w+filter_size]
                    val += torch.tensordot(img_fragment.float(), filters[c, i])
                output[c, h, w] = val + bias[c]
    return output

In [116]:
# Convolution Test

# cast the frog to tensor
image = torch.tensor(np_image)
# preapre parameters for testing
paddings = [0, 1, 2, 3]
strides = [1, 2, 3, 4]
filters = [(torch.randn((2,3,3,3)), torch.randn((2))),
           (torch.randn((2,3,5,5)), torch.randn((2))),
           (torch.randn((5,3,1,1)), torch.randn((5)))]

# test all combinations
for (filt, bias), stride, padding in product(filters, strides, paddings):
    # your convolution
    out = convolution(image, filt, bias, stride=stride, padding=padding)
    # PyTorch equivalent
    out_torch = torch.conv2d(input=image.unsqueeze(0), weight=filt, bias=bias, padding=padding, stride=stride)
    # asserts
    assert out_torch.squeeze().shape == out.shape
    assert torch.allclose(out, out_torch.squeeze(), atol=1e-5, rtol=1e-5)

In [126]:
def max_pooling(image: torch.tensor,
                kernel_size: int,
                stride: int = 1,
                padding: int = 1):
    """
    :param image: torch.Tensor
        Input image of shape (C, H, W)
    :param kernel_size: int
        Size of the square pooling kernel
    :param stride: int
        Stride to use in pooling
    :param padding: int
       Zero-padding to add on all sides of the image
    """
    # get image dimensions
    img_channels, img_height, img_width = image.shape

    # calculate the dimensions of the output image
    out_height = math.floor((2*padding + img_height - kernel_size)/stride) + 1
    out_width = math.floor((2*padding + img_width - kernel_size)/stride) + 1
    out_channels = img_channels

    img_for_iterating = transforms.Pad(padding=padding, fill=0).forward(img=image)
    output = torch.FloatTensor(size=(out_channels, out_height, out_width))

    for c in range(out_channels):
        for h in range(out_height):
            for w in range(out_width):
                tmp_h = stride * h
                tmp_w = stride * w
                img_fragment = img_for_iterating[c, tmp_h:tmp_h+kernel_size, tmp_w:tmp_w+kernel_size]
                output[c, h, w] = torch.max(img_fragment)
    return output

In [127]:
# Max Pooling Test
from itertools import product

# cast the frog to tensor
image = torch.tensor(np_image)
# preapre parameters for testing
kernel_sizes = [2, 3, 4]
paddings = [0, 1]
strides = [1, 2, 3, 4]

# test all combinations
for kernel_size, stride, padding in product(kernel_sizes, strides, paddings):
    # your pooling
    out = max_pooling(image, kernel_size=kernel_size, stride=stride, padding=padding)
    # PyTorch equivalent
    out_torch = torch.nn.functional.max_pool2d(input=image.unsqueeze(0), kernel_size=kernel_size, padding=padding, stride=stride)
    # asserts
    assert out_torch.squeeze().shape == out.shape
    assert torch.allclose(out, out_torch.squeeze(), atol=1e-5, rtol=1e-5)