# Part 1

## Question 1

In [1]:
# for sample in batch_size:
#     padding_width = input_width + 2*padding
#     padding_height = input_height + 2*padding
#     # Given that output_width and output_height are the same:
#     output_width = output_height = ((input_width - kernel_size + 2*padding)/stride) + 1
#     # Create empty output for this layer
#     output = zeros([total_kernels, output_width, output_height])
#     for layer in input_channels:
#         layer_pad = [sample, layer, :, :]
#         # Add padding
#         for pad in padding:
#             add column of zeros to right to layer_pad
#             add column of zeros to left to layer_pad
#             add column of zeros to top to layer_pad
#             add column of zeros to bottom to layer_pad
#         # Move over layer:
#         for y in range(output_height-1):
#             for x in range(output_width-1):
#                 # If multiple kernels exist, save in different slices of output
#                 for kernel in total_kernels:
#                     patch = layer_pad[y*stride : (y*stride)+kernel_size, x*stride : (x*stride)+kernel_size]
#                     output[kernel, y, x] += sum( patch * kernel[layer, :, :] )
#     sample_output[sample, :, :, :] = output


## Question 2

Question 2. 
For a given input tensor, kernel size, stride and padding (no dilutions) work out
a general function that computes the size of the output.
https://pytorch.org/docs/stable/generated/torch.nn.functional.conv2d.html?highlight=functional%20conv2d#torch.nn.functional.conv2d 

In [2]:
import torch
import numpy as np
import ipykernel
import math
ipykernel.__version__

'6.4.1'

In [3]:
tensor = np.random.rand(1, 2, 27, 27)
weight = np.random.rand(1, 2, 3, 3)

def output_size(input_tensor, kernel_size, stride, padding):
    batch_size, channels, height, width = input_tensor.shape # <- similar to tensor.size() in torch, but '.shape' here since it is numpy
    out_size = (((height - kernel_size + 2*padding) / stride ) + 1)
    return out_size

print(output_size(tensor, 3, 2, 0)) #<- example from the slides: Lecture 3 AlexNet, top right

tensor = torch.rand([1, 2, 27, 27])
weight = torch.rand([1, 2, 3, 3])

print(torch.nn.functional.conv2d(tensor, weight, stride=2,padding=0).size())


13.0
torch.Size([1, 1, 13, 13])


## Question 3

Question 3: 
Write a naive (non-vectorized) implementation of the unfold function in
pseudocode. Include the pseudocode in your report.
https://pytorch.org/docs/stable/generated/torch.nn.functional.unfold.html 

In [4]:
# #input_tensor = [b, c, h, w] #batch_size, channels, height, width

# # Pseudo code naive unfold:
# def naive_unfold(input_tensor, kernel_size, stride, padding):
#     output_size = output_size(input_tensor, kernel_size, stride, padding)
#     #1 extract all patches from the input
#     for sample in b:
#         for channel in c:
#             layer_pad = [sample, layer, :, :]
#             # Add padding
#             for pad in padding:
#                 add column of zeros to right to layer_pad
#                 add column of zeros to left to layer_pad
#                 add column of zeros to top to layer_pad
#                 add column of zeros to bottom to layer_pad
#             n_patches_per_layer = output_size * output_size
#             for y in range(output_size-1):
#                 for x in range(output_size-1):
#                     # x+ y = number of patch, total_patch is 0 first time
#                     total_patch = (x+y) * channel
#                     patch[sample, x+y + total_patch ,:, :] = layer_pad[y*stride : (y*stride)+kernel_size, 
#                                                                         x*stride : (x*stride)+kernel_size]
#     # 2. Flatten these patches (with all channels) into vectors, arranged as the columns of a matrix X.
#     #### THIS CORRECT???
#     X = patch[:, :].flatten()
#     p = len(X)
#     # 3. Multiply this matrix by a weight matrix Y = XW
#     Y = X * W
#     # 4. Reshape the matrix Y, so that its columns become the pixels of the output tensor.
#     k = c * output_size * output_size
#     Y = Y.reshape([b, k, p])
#     return patch
#     #output = [b, k, p] #batch_size, number of values per patch, number of patches

## Torch Module

In [5]:
import torch
import torch.nn.functional as F
from torch import nn

class Conv2D(nn.Module):
    def __init__(self,in_channels, out_channels, kernel_size=(3,3), stride=1, padding=1):
        super().__init__() # <- belangrijk!
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        
    def forward(self, input_batch):
        batch_size, channels, height, width = input_batch.size()
        print("input_batch = ", input_batch.size())
        
        # output dimensions
        h_out = int(output_size(input_batch, self.kernel_size[0], self.stride, self.padding))
        w_out = int(output_size(input_batch, self.kernel_size[1], self.stride, self.padding))
        
        # unfolded matrix (b, k, p)
        unfolded = F.unfold(input_batch, self.kernel_size, padding=self.padding, stride=self.stride)
        print("unfolded = ", unfolded.size())
        batch_size, k_values_per_patch, patches = unfolded.size()
        
        # reshape to (b, p, k) tensor, than merge b and p to get (b*p,k) tensor
        reshaped = torch.transpose(unfolded, 1, 2).reshape(-1, k_values_per_patch)
        print("X_reshaped = ", reshaped.size())
        
        # Initiate random weights with correct dimensions
        W = torch.rand((k_values_per_patch, self.out_channels)) # - rows: number of nodes in one patch of input. -columns: # of nodes in one pixel in output
        print("W = ", W.size())
        
        # Matrix multiplication to get Y
        Y = torch.mm(reshaped, W) # bmm?
        print("Y = ", Y.size())
        
        # Reshape to get seperate batches back
        Y_reshaped = Y.reshape((batch_size, patches, self.out_channels)) # contains one row-vector for each pixel in output
        print("Y_reshaped = ", Y_reshaped.size())
        
        # Permute to swap axis for p and k
        Y_permuted = torch.permute(Y_reshaped, (0, 2, 1))
        print("Y_permuted = ", Y_permuted.size())
        
        # Fold back to obtain the output of this layer
        output = Y_permuted.reshape(batch_size, self.out_channels, h_out, w_out)
        print("output = ", output.size())
        
        assert output.size() == torch.nn.functional.conv2d(input_batch, W.reshape(self.out_channels, self.in_channels, self.kernel_size[0], self.kernel_size[1]),padding=self.padding).size()
        return output

# We use the Conv2D module by instantiating it, and applying it to an input.
torch.manual_seed(0)
conv = Conv2D(in_channels= 3, out_channels= 8)
input_batch = torch.randn(16, 3, 32, 32)
output_batch = conv(input_batch)

input_batch =  torch.Size([16, 3, 32, 32])
unfolded =  torch.Size([16, 27, 1024])
X_reshaped =  torch.Size([16384, 27])
W =  torch.Size([27, 8])
Y =  torch.Size([16384, 8])
Y_reshaped =  torch.Size([16, 1024, 8])
Y_permuted =  torch.Size([16, 8, 1024])
output =  torch.Size([16, 8, 32, 32])


## Question 4, 5 & 6

In [6]:
class MyConv2DFunc(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward
    passes which operate on Tensors.
    """
    @staticmethod
    def forward(ctx, input_batch, kernel, stride=1, padding=1):
        """
        In the forward pass we receive a Tensor containing the input
        and return a Tensor containing the output. ctx is a context
        object that can be used to stash information for backward
        computation. You can cache arbitrary objects for use in the
        backward pass using the ctx.save_for_backward method.
        """
        output_channels = kernel.size()[1]
        
        # your code here
        batch_size, input_channels, height, width = input_batch.size()
        print("X = ", input_batch.size())
        
        kernel_size = int(math.sqrt(kernel.size()[0]/input_channels))
        
        # output dimensions
        h_out = int(output_size(input_batch, kernel_size, stride, padding))
        w_out = int(output_size(input_batch, kernel_size, stride, padding))
        
        # unfolded matrix (b, k, p)
        U = F.unfold(input_batch, (kernel_size, kernel_size), padding=padding, stride=stride)
        print("U = ", U.size())
        batch_size, k_values_per_patch, patches = U.size()
        
        # reshape to (b, p, k) tensor, than merge b and p to get (b*p,k) tensor
        U_reshaped = torch.transpose(U, 1, 2).reshape(-1, k_values_per_patch)
        print("U_reshaped = ", U_reshaped.size())
        
        # Initiate random weights with correct dimensions
        W = torch.rand((k_values_per_patch, output_channels)) # - rows: number of nodes in one patch of input. -columns: # of nodes in one pixel in output
        print("W = ", W.size())
    
        # store objects for the backward
        ctx.save_for_backward(input_batch, U_reshaped, W)
        
        # Matrix multiplication to get Y
        Y = torch.mm(U_reshaped, W) # bmm?
        print("Y = ", Y.size())
        
        # Reshape to get seperate batches back
        Y_reshaped = Y.reshape((batch_size, patches, output_channels)) # contains one row-vector for each pixel in output
        print("Y_reshaped = ", Y_reshaped.size())
        
        # Permute to swap axis for p and k
        Y_permuted = torch.permute(Y_reshaped, (0, 2, 1))
        print("Y_permuted = ", Y_permuted.size())
        
        output_batch = Y_permuted.reshape(batch_size, output_channels, h_out, w_out)
        print("output_batch = ", output_batch.size())
        print("\n")
        
        assert output_batch.size() == torch.nn.functional.conv2d(input_batch, W.reshape(output_channels, input_channels, kernel_size, kernel_size),padding=padding).size()
        return output_batch

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the
        gradient of the loss with respect to the output, and we need
        to compute the gradient of the loss with respect to the
        input
        """
        # retrieve stored objects
        input_batch, X_reshaped, W = ctx.saved_tensors
        # your code here
        
        grad_Y_permuted = grad_output.reshape(grad_output.size()[0], grad_output.size()[1], grad_output.size()[2]*grad_output.size()[2])
        print("grad_Y_permuted = ", grad_Y_permuted.size())
        grad_Y_reshaped = torch.permute(grad_Y_permuted, (0, 2, 1))
        print("grad_Y_reshaped = ", grad_Y_reshaped.size())
        grad_Y = grad_Y_reshaped.reshape(grad_Y_reshaped.size()[0]*grad_Y_reshaped.size()[1],grad_Y_reshaped.size()[2])
        print("grad_Y = ", grad_Y.size())
        kernel_grad = torch.transpose(torch.mm(torch.transpose(grad_Y,0,1), X_reshaped),0,1)
        print("grad_W = ", kernel_grad.size())
        grad_U_reshaped = torch.mm(grad_Y, torch.transpose(W,0,1))
        print("grad_U_reshaped = ", grad_U_reshaped.size())
        grad_U = torch.permute(grad_U_reshaped.reshape(input_batch.size()[0], int(grad_U_reshaped.size()[0]/input_batch.size()[0]), kernel_grad.size()[0]), (0, 2, 1))
        print("grad_U = ", grad_U.size())
        input_batch_grad = F.fold(grad_U, output_size=[input_batch.size()[2], input_batch.size()[3]], kernel_size=(3,3), padding=1)
        print("grad_X = ", input_batch_grad.size())
        return input_batch_grad, kernel_grad, None, None
        
input_channels = 3
output_channels = 8
kernel_size = 3

input_batch = torch.randn(16, 3, 32, 32, requires_grad=True)
kernel = torch.randn(kernel_size*kernel_size*input_channels, output_channels, requires_grad=True)

conv = MyConv2DFunc.apply
output = conv(input_batch, kernel)
loss = output.sum()
loss.backward()

X =  torch.Size([16, 3, 32, 32])
U =  torch.Size([16, 27, 1024])
U_reshaped =  torch.Size([16384, 27])
W =  torch.Size([27, 8])
Y =  torch.Size([16384, 8])
Y_reshaped =  torch.Size([16, 1024, 8])
Y_permuted =  torch.Size([16, 8, 1024])
output_batch =  torch.Size([16, 8, 32, 32])


grad_Y_permuted =  torch.Size([16, 8, 1024])
grad_Y_reshaped =  torch.Size([16, 1024, 8])
grad_Y =  torch.Size([16384, 8])
grad_W =  torch.Size([27, 8])
grad_U_reshaped =  torch.Size([16384, 27])
grad_U =  torch.Size([16, 27, 1024])
grad_X =  torch.Size([16, 3, 32, 32])


# Part 2

## Question 7

In [7]:
import torchvision
from torchvision.transforms import ToTensor
from tqdm import tqdm
import torch

arg = {"data":'./data', "batch": 60000} # with batch = 16 we get a dataloader of length 3750 (*16=60.000)
training_data = torchvision.datasets.MNIST(root=arg['data'], train=True, download=True, transform=ToTensor())
trainloader = torch.utils.data.DataLoader(training_data, batch_size=arg['batch'], shuffle=True, num_workers=2)
test_set = torchvision.datasets.MNIST(root=arg['data'], train=False, download=True, transform=ToTensor())
testloader = torch.utils.data.DataLoader(test_set, batch_size=arg['batch'], shuffle=True, num_workers=2)

for i, data in enumerate(trainloader):
    input, labels = data

#ongeveer 15sec

In [8]:
training = (input[:50000])
training_label = labels[:50000]
validation = input[50000:]
validation_label = labels[50000:]

def loop_over(data, label, step):
    for i in range(0,len(data),step):
        batch = data[i:i+step]
        batch_labels = label[i:i+step]

loop_over(training, training_label, 16)
loop_over(validation, validation_label, 16)

## Question 8
Build this network and tune the hyperparameters until you get a good baseline
performance you are happy with. You should be able to get at least 95% accuracy. If training
takes too long, you can reduce the number of channels in each layer.

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
import torchvision.transforms
from torchvision.transforms import ToTensor, transforms
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np

In [10]:
class Net(nn.Module):
    def __init__(self, input_chan, kernel, stride, padding, output):
        super().__init__()
        self.conv1 = nn.Sequential(nn.Conv2d(input_chan, 16, kernel, stride, padding), nn.ReLU(), nn.MaxPool2d(2,2))
        self.conv2 = nn.Sequential(nn.Conv2d(16, 32, kernel, stride, padding), nn.ReLU(), nn.MaxPool2d(2,2))
        self.conv3 = nn.Sequential(nn.Conv2d(32, 64, kernel, stride, padding), nn.ReLU(), nn.MaxPool2d(2,2))
        self.out = nn.Linear(64*3*3, output)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = torch.flatten(x, 1)
        output = self.out(x)
        return output

In [11]:
def get_loaders(batch_size):
    loaders = {'train_set' : DataLoader(train_set, 
                                          batch_size=batch_size, 
                                          shuffle=True, 
                                          num_workers=1),
               'val_set' : DataLoader(val_set, 
                                          batch_size=10000, 
                                          shuffle=True, 
                                          num_workers=1),
                'test_set'  : DataLoader(test_set, 
                                          batch_size=len(test_set), 
                                          shuffle=False, 
                                          num_workers=1)}
    return loaders

In [12]:
def validate(net, loaders):
    net.eval()
    correct = 0
    with torch.no_grad():
        for x, y in loaders['val_set']:
            output = net(x)
            _,pred_y = torch.max(output, dim = 1)
            correct += (pred_y == y).float().sum()

    print('accuracy on validation set', (correct / 10000)*100, '%')
    return (correct / 10000)*100

def train(net, loaders, epochs, loss_f, opt):
    train_loss = []
    epoch_list = []
    acc_list = []

    for i in range(epochs):
        print('epoch = ', i)
        for j, (x, y) in enumerate(loaders['train_set']):
            opt.zero_grad()
            x_batch = x
            y_batch = y
            output = net.forward(x_batch)
            loss = loss_f(output, y_batch)
            train_loss.append(loss)
            if j % 1000 == 0:
                print('loss:', loss.item())
            loss.backward()
            opt.step() 
        epoch_list.append(i)
        acc_list.append(validate(net, loaders))
    return train_loss, epoch_list, acc_list

In [13]:
# input_chan = 1
# output = 10
# kernel = 3
# stride = 1
# padding = 1
# lr = 0.0001
# net = Net(input_chan, kernel, stride, padding, output)
# loss_f = nn.CrossEntropyLoss()
# opt = optim.Adam(net.parameters(), lr)


# epochs = 5
# batch_size = 16
# train_set, val_set = torch.utils.data.random_split(training_data, [50000, 10000])
# loaders = get_loaders(batch_size) 
# train_loss, epoch_list, acc_list = train(net, loaders, epochs, loss_f, opt)

In [14]:
# y1 = acc_list
# x1 = epoch_list

# plt.plot(x1, y1, label = "batch_size = 16" )
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.title('Accuracy during training epochs')
# plt.xticks(np.arange(0, 20, 1))
# plt.savefig('figures/Q8_acc')
# plt.show()

## Question 9

In [15]:
augmented = transforms.Compose(
                    [
                    transforms.RandomRotation((-7.0,7.0),fill=(1,)),
                    # transforms.RandomAffine((-3,3),translate =(0.05,0.01)),
                    transforms.ToTensor(),
                    transforms.Normalize((0.1308,), (0.3016,)) 
                    ])

training_data = torchvision.datasets.MNIST(root=arg['data'], train=True, download=True, transform=augmented)
train_set, val_set = torch.utils.data.random_split(training_data, [50000, 10000])    

In [16]:
# input_chan = 1
# output = 10
# kernel = 3
# stride = 1
# padding = 1
# lr = 0.0001
# net = Net(input_chan, kernel, stride, padding, output)
# loss_f = nn.CrossEntropyLoss()
# opt = optim.Adam(net.parameters(), lr)

# epochs = 5
# batch_size = 16
# loaders = get_loaders(batch_size) 
# train_loss, epoch_list, acc_list = train(net, loaders, epochs, loss_f, opt)

In [17]:
# plt.plot(epoch_list, acc_list, label = "batch_size = 16" )
# plt.xlabel('Epochs')
# plt.ylabel('Accuracy')
# plt.title('Accuracy during training epochs')
# plt.xticks(np.arange(0, 20, 1))
# plt.savefig('Q9_acc_augmented')
# plt.show()

## Question 10

In [18]:
input_tensor = torch.rand(1, 3, 1024, 768)
conv_layer = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride = 2, padding=1)
print(conv_layer(input_tensor).size())

input_tensor = torch.rand(1, 3, 1920, 1080)
conv_layer = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride = 2, padding=1)
print(conv_layer(input_tensor).size())

input_tensor = torch.rand(1, 8, 1920, 1080)
try:
    conv_layer = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=5, stride = 2, padding=1)
    print(conv_layer(input_tensor).size())
except:
    print("ERROR: Dimensions are not correct..")

torch.Size([1, 16, 511, 383])
torch.Size([1, 16, 959, 539])
ERROR: Dimensions are not correct..


## Question 11

In [19]:
b = 16
c = 3
h = 32
w = 32

input_tensor = torch.rand((b, c, h, w))
global_average_pooling = torch.nn.AvgPool2d((h,w), stride=None, padding=0)
global_max_pooling = torch.nn.MaxPool2d((h,w), stride=None, padding=0)
avg_pooled = global_average_pooling(input_tensor)  
max_pooled = global_max_pooling(input_tensor)
print(avg_pooled.size())
print(max_pooled.size())

torch.Size([16, 3, 1, 1])
torch.Size([16, 3, 1, 1])


## Question 12

In [20]:
one_size = transforms.Compose(
                    [
                    transforms.Resize((28,28)),
                    # transforms.RandomRotation((-7.0,7.0),fill=(1,)),
                    # transforms.RandomAffine((-3,3),translate =(0.02,0.02)),
                    transforms.ToTensor(),
                    # transforms.Normalize((0.1308,), (0.3016,)) 
                    ])

training_data = torchvision.datasets.ImageFolder('mnist-varres/train', transform=one_size)
test_set= torchvision.datasets.ImageFolder('mnist-varres/test', transform=one_size)
train_set, val_set = torch.utils.data.random_split(training_data, [50000, 10000]) 

In [21]:
trainloader = torch.utils.data.DataLoader(training_data, batch_size=arg['batch'], shuffle=True, num_workers=2)
for i, data in enumerate(trainloader):
    input, labels = data

In [22]:
# input_chan = 3   ## I DON"T KNOW WHY IT HAS 3 INPUT CHANNELS HERE (NOWHERE MENTIONED)
# output = 10
# kernel = 3
# stride = 1
# padding = 1
# lr = 0.0001
# net = Net(input_chan, kernel, stride, padding, output)
# loss_f = nn.CrossEntropyLoss()
# opt = optim.Adam(net.parameters(), lr)

# epochs = 5
# batch_size = 16
# loaders = get_loaders(batch_size) 
# train_loss, epoch_list, acc_list = train(net, loaders, epochs, loss_f, opt)

## Question 13

In Overleaf

In [23]:
!pip install cv2

ERROR: Could not find a version that satisfies the requirement cv2 (from versions: none)
ERROR: No matching distribution found for cv2


## Question 14

In [38]:
import glob
import os
import PIL
import scipy.misc

# All files ending with .png
train_data = glob.glob("**/*png")
print(train_data)
# Append pictures to list with their label
size_32 = []
size_48 = []
size_64 = []
for picture in train_data:
    [label, name] = picture.split("\\")
    size = PIL.Image.open(picture).size[0]
    print(scipy.misc.imread(picture, flatten=False, mode='RGB'))
    if size == 32:
        size_32.append((scipy.misc.imread(picture, flatten=False, mode='RGB'), label))
    elif size == 48:
        size_48.append((scipy.misc.imread(picture, flatten=False, mode='RGB'), label))
    elif size == 64:
        size_64.append((scipy.misc.imread(picture, flatten=False, mode='RGB'), label))

data_32 = torch.tensor(size_32)
data_48 = torch.tensor(size_48)
data_64 = torch.tensor(size_64)

[]
