# MSc at UCU. Deep Learning 2019
## Home Assignment 2. Oleh Lukianykhin
## Comparison of PyTorch and custom implementations

In [1]:
from simple_conv_net_func import diff_mse
import torch
from simple_conv_net_func import conv2d_scalar, pool2d_scalar, relu_scalar, reshape_scalar, fc_layer_scalar
from simple_conv_net_func import conv2d_vector, pool2d_vector, relu_vector, reshape_vector, fc_layer_vector
from simple_conv_net_func import conv_weight2rows, im2col

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# this naming was introduced to make reading easier
pytorch_conv = nn.Conv2d(in_channels=1, 
                         out_channels=20,
                            kernel_size=5,
                            stride=1,
                            padding=0,
                            dilation=1,
                            groups=1,
                            bias=True)
pytorch_pool = lambda x: F.max_pool2d(x, 2, 2)
pytorch_reshape = lambda x: x.view(-1, 20*12*12)
pytorch_fc = lambda inn, out: nn.Linear(in_features=inn, out_features=out)
pytorch_relu = lambda x: F.relu(x)

In [3]:
x_in = torch.rand([64, 1, 28, 28])

## First compare 2d convolution implementation in pytorch with mine

In [4]:
%%time
torch_res = pytorch_conv(x_in)
b = pytorch_conv.bias
w = pytorch_conv.weight
my_res = conv2d_vector(x_in, w, b, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res, my_res)))

MSE difference for the vector version is 0.0
Wall time: 1.18 s


In [5]:
%%time
torch_res = pytorch_conv(x_in)
b = pytorch_conv.bias
w = pytorch_conv.weight
my_res = conv2d_scalar(x_in, w, b, 'cpu')
print("MSE difference for the scalar version is {}".format(diff_mse(torch_res, my_res)))

MSE difference for the scalar version is 1.2936938550113454e-15
Wall time: 56.9 s


## Compare 2d pooling implementation in pytorch with mine

In [5]:
%%time
torch_res_pool = pytorch_pool(torch_res)
my_res_pool = pool2d_vector(torch_res, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_pool, my_res_pool)))

MSE difference for the vector version is 0.0
Wall time: 30.9 ms


In [12]:
%%time
torch_res_pool = pytorch_pool(torch_res)
my_res_pool = pool2d_scalar(torch_res, 'cpu')
print("MSE difference for the scalar version is {}".format(diff_mse(torch_res_pool, my_res_pool)))

MSE difference for the scalar version is 0.0
Wall time: 481 ms


## Compare reshape from pytorch with my implementation

In [6]:
%%time
torch_res_reshape = pytorch_reshape(torch_res_pool)
my_res_reshape = reshape_vector(torch_res_pool, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_reshape, my_res_reshape)))

MSE difference for the vector version is 0.0
Wall time: 2.99 ms


In [7]:
%%time
torch_res_reshape = pytorch_reshape(torch_res_pool)
my_res_reshape = reshape_scalar(torch_res_pool, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_reshape, my_res_reshape)))

MSE difference for the vector version is 0.0
Wall time: 11.2 s


## Compare fully connected layer from pytorch with my implementation

In [7]:
%%time
torch_fc_1 = pytorch_fc(2880, 500)
torch_res_fc1 = torch_fc_1(torch_res_reshape)
b = torch_fc_1.bias
w = torch_fc_1.weight
my_res_fc_1 = fc_layer_vector(torch_res_reshape, w, b, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_fc1, my_res_fc_1)))


MSE difference for the vector version is 2.741143671707133e-15
Wall time: 25.9 ms


In [84]:
%%time
torch_fc_1 = pytorch_fc(2880, 500)

b = torch_fc_1.bias.data.clone()
w = torch_fc_1.weight.data.clone()
x = torch_res_reshape.data.clone()
torch_res_fc1 = torch_fc_1(torch_res_reshape)

Wall time: 22.9 ms


In [86]:
%%time
my_res_fc_1 = fc_layer_scalar(x[:5], w, b, 'cpu')

Wall time: 2min 6s


In [87]:
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_fc1[:5], my_res_fc_1)))

MSE difference for the vector version is 3.150770795526739e-14


FC-layers are **very** slow in scalar form, so i tested just for first 4 "images" in the generated batch

## Compare ReLU from pytorch with my implementation

In [9]:
%%time
torch_res_relu = pytorch_relu(torch_res_fc1)
my_res_fc_relu = relu_vector(torch_res_fc1, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_relu, my_res_fc_relu)))

MSE difference for the vector version is 0.0
Wall time: 1.87 ms


In [10]:
%%time
torch_res_relu = pytorch_relu(torch_res_fc1)
my_res_fc_relu = relu_scalar(torch_res_fc1, 'cpu')
print("MSE difference for the vector version is {}".format(diff_mse(torch_res_relu, my_res_fc_relu)))

MSE difference for the vector version is 0.0
Wall time: 1.32 s


## As expected, difference is extremely small or equal to 0 for both implementations: scalar and vector.
## However, scalar versions work much slower. Fully connected layer is extremely slow, because of huge number of simple operations that are executed sequentialy, not in parallel.