Question 1: 
Write a pseudo-code for how you would implement this with a set of nested
for loops. The convolution is defined by a set of weights/parameters which we will learn.
How do you represent these weights?


In [21]:
# for sample in batch_size:
#     padding_width = input_width + 2*padding
#     padding_height = input_height + 2*padding
#     # Given that output_width and output_height are the same:
#     output_width = output_height = ((input_width - kernel_size + 2*padding)/stride) + 1
#     # Create empty output for this layer
#     output = zeros([total_kernels, output_width, output_height])
#     for layer in input_channels:
#         layer_pad = [sample, layer, :, :]
#         # Add padding
#         for pad in padding:
#             add column of zeros to right to layer_pad
#             add column of zeros to left to layer_pad
#             add column of zeros to top to layer_pad
#             add column of zeros to bottom to layer_pad
#         # Move over layer:
#         for y in range(output_height-1):
#             for x in range(output_width-1):
#                 # If multiple kernels exist, save in different slices of output
#                 for kernel in total_kernels:
#                     patch = layer_pad[y*stride : (y*stride)+kernel_size, x*stride : (x*stride)+kernel_size]
#                     output[kernel, y, x] += sum( patch * kernel[layer, :, :] )
#     sample_output[sample, :, :, :] = output


Question 2. 
For a given input tensor, kernel size, stride and padding (no dilutions) work out
a general function that computes the size of the output.
https://pytorch.org/docs/stable/generated/torch.nn.functional.conv2d.html?highlight=functional%20conv2d#torch.nn.functional.conv2d 

In [1]:
import torch
import numpy as np
import ipykernel
ipykernel.__version__

'6.4.1'

In [22]:
tensor = np.random.rand(1, 2, 27, 27)
weight = np.random.rand(1, 2, 3, 3)

def output_size(input_tensor, kernel_size, stride, padding):
    batch_size, channels, height, width = input_tensor.shape # <- similar to tensor.size() in torch, but '.shape' here since it is numpy
    out_size = (((height - kernel_size + 2*padding) / stride ) + 1)
    return out_size

print(output_size(tensor, 3, 2, 0)) #<- example from the slides: Lecture 3 AlexNet, top right

tensor = torch.rand([1, 2, 27, 27])
weight = torch.rand([1, 2, 3, 3])

print(torch.nn.functional.conv2d(tensor, weight, stride=2,padding=0).size())


13.0
torch.Size([1, 1, 13, 13])


Question 3: 
Write a naive (non-vectorized) implementation of the unfold function in
pseudocode. Include the pseudocode in your report.
https://pytorch.org/docs/stable/generated/torch.nn.functional.unfold.html 

In [24]:
# #input_tensor = [b, c, h, w] #batch_size, channels, height, width

# # Pseudo code naive unfold:
# def naive_unfold(input_tensor, kernel_size, stride, padding):
#     output_size = output_size(input_tensor, kernel_size, stride, padding)
#     #1 extract all patches from the input
#     for sample in b:
#         for channel in c:
#             layer_pad = [sample, layer, :, :]
#             # Add padding
#             for pad in padding:
#                 add column of zeros to right to layer_pad
#                 add column of zeros to left to layer_pad
#                 add column of zeros to top to layer_pad
#                 add column of zeros to bottom to layer_pad
#             n_patches_per_layer = output_size * output_size
#             for y in range(output_size-1):
#                 for x in range(output_size-1):
#                     # x+ y = number of patch, total_patch is 0 first time
#                     total_patch = (x+y) * channel
#                     patch[sample, x+y + total_patch ,:, :] = layer_pad[y*stride : (y*stride)+kernel_size, 
#                                                                         x*stride : (x*stride)+kernel_size]
#     # 2. Flatten these patches (with all channels) into vectors, arranged as the columns of a matrix X.
#     #### THIS CORRECT???
#     X = patch[:, :].flatten()
#     p = len(X)
#     # 3. Multiply this matrix by a weight matrix Y = XW
#     Y = X * W
#     # 4. Reshape the matrix Y, so that its columns become the pixels of the output tensor.
#     k = c * output_size * output_size
#     Y = Y.reshape([b, k, p])
#     return patch
#     #output = [b, k, p] #batch_size, number of values per patch, number of patches

Torch Module : niet af!
Inspiratie (kreeg ik doorgestuurd): 
https://discuss.pytorch.org/t/decompose-conv2d-input-unfold-gemm-fold/93740 

In [2]:
import torch
import torch.nn.functional as F
from torch import nn

class Conv2D(nn.Module):
    def __init__(self,in_channels, out_channels, kernel_size=(3,3), stride=1, padding=1):
        super().__init__() # <- belangrijk!
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
    def forward(self, input_batch):

        
        batch_size, channels, height, width = input_batch.size()
        print("input_batch = ", input_batch.size())
        # your code here
        unfolded = F.unfold(input_batch, self.kernel_size, padding=self.padding, stride=self.stride)
        print("unfolded = ", unfolded.size())
        # now (b, p, k)
        batch_size, k_values_per_patch, patches = unfolded.size()
        reshaped = unfolded.reshape((batch_size * k_values_per_patch, patches))
        print("reshaped = ", reshaped.size())
        W = torch.rand((patches, patches)) # - rows: number of nodes in one patch of input. -columns: # of nodes in one pixel in output
        print("W = ", W.size())
        Y = torch.matmul(reshaped, W) # bmm?
        print("Y = ", Y.size())
        rereshaped = Y.reshape((batch_size, k_values_per_patch, patches)) # contains one row-vector for each pixel in output

        W = torch.rand([16, 3, 32, 32])

        print(torch.nn.functional.conv2d(input_batch, W, stride=self.stride,padding=self.padding).size())


        # permutation
        # reshape
        return rereshaped

# We use the Conv2D module by instantiating it, and applying it to an input.
conv = Conv2D(in_channels= 3, out_channels= 8)
input_batch = torch.randn(16, 3, 32, 32)
output_batch = conv(input_batch)

# by applying formula as described in output_size() the output_size should also be 32x32

input_batch =  torch.Size([16, 3, 32, 32])
unfolded =  torch.Size([16, 27, 1024])
reshaped =  torch.Size([432, 1024])
W =  torch.Size([1024, 1024])
Y =  torch.Size([432, 1024])
torch.Size([16, 16, 3, 3])


Torch function: just the code from the assignment

In [None]:
class Conv2DFunc(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward
    passes which operate on Tensors.
    """
    @staticmethod
    def forward(ctx, input_batch, kernel, stride=1, padding=1):
        """
        In the forward pass we receive a Tensor containing the input
        and return a Tensor containing the output. ctx is a context
        object that can be used to stash information for backward
        computation. You can cache arbitrary objects for use in the
        backward pass using the ctx.save_for_backward method.
        """
        # store objects for the backward
        ctx.save_for_backward(input)
        ctx.save_for_backward(kernel)
        # your code here
        ...
        return output_batch

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the
        gradient of the loss with respect to the output, and we need
        to compute the gradient of the loss with respect to the
        input
        """
        # retrieve stored objects
        input, = ctx.saved_tensors
        # your code here
        ...
        # The gradients of the inputs. For anything that doesn't have
        # a gradient (the stride and padding) you can
        # return None.
        return input_batch_grad, kernel_grad, None, None
        
input_batch = torch.randn(16, 3, 32, 32)
kernel = torch.randn(...)
Conv2DFunc.apply(input_batch, kernel)
output_batch = conv(input_batch)

Part 2

Question 7

In [22]:
import torchvision
from torchvision.transforms import ToTensor
from tqdm import tqdm

arg = {"data":'./data', "batch": 60000} # with batch = 16 we get a dataloader of length 3750 (*16=60.000)
train = torchvision.datasets.MNIST(root=arg['data'], train=True, download=True, transform=ToTensor())
trainloader = torch.utils.data.DataLoader(train, batch_size=arg['batch'], shuffle=True, num_workers=2)
test = torchvision.datasets.MNIST(root=arg['data'], train=False, download=True, transform=ToTensor())
testloader = torch.utils.data.DataLoader(test, batch_size=arg['batch'], shuffle=True, num_workers=2)

for i, data in enumerate(trainloader):
    input, labels = data

#ongeveer 15sec

In [31]:
training = input[:50000]
training_label = labels[:50000]
validation = input[50000:]
validation_label = labels[50000:]

def loop_over(data, label, step):
    for i in range(0, len(data),step):
        data[i:i+step]
        label[i:i+step]

loop_over(training, training_label, 16)
    



Question 8:
Build this network and tune the hyperparameters until you get a good baseline
performance you are happy with. You should be able to get at least 95% accuracy. If training
takes too long, you can reduce the number of channels in each layer.

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# vanuit de blitz tutorial
# class Net(nn.Module):
#     def __init__(self):
#         super().__init__()
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(6, 16, 5)
#         self.fc1 = nn.Linear(16 * 5 * 5, 120)
#         self.fc2 = nn.Linear(120, 84)
#         self.fc3 = nn.Linear(84, 10)

#     def forward(self, x):
#         x = self.pool(F.relu(self.conv1(x)))
#         x = self.pool(F.relu(self.conv2(x)))
#         x = torch.flatten(x, 1) # flatten all dimensions except batch
#         x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
#         x = self.fc3(x)
#         return x


net = Net()

conv2d(tensor, weight, stride=2,padding=0)