In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
import torch # type: ignore
import sys; sys.path.append('../../')
from  giagrad.tensor import Tensor
import numpy as np
from giagrad.display import draw_dot
import torch.nn as nn
import string
import math
from itertools import chain
from numpy.lib.stride_tricks import as_strided

# Conv 2D

## Helpers

In [127]:
import string
from math import floor
alphabet = [chr(i) for i in chain(range(33,127), range(161, 1200))]
# Channels out, Channels in, kernel Height, kernel Width
def kernel(Cout, Cin, kH, kW, a=0, b=4):
    return np.random.randint(a, b, (Cout, Cin, kH, kW))

def dataBatched(N, Cin, Hin, Win):
#     return np.array(alphabet[:N*Cin*Hin*Win], dtype=object).reshape((N, Cin, Hin, Win))
    return np.random.randint(-4, 4, (N, Cin, Hin, Win))

def dataUnBatched(Cin, Hin, Win):
    return np.array(alphabet[:Cin*Hin*Win], dtype=object).reshape((Cin, Hin, Win))

## Forward

Para la convolucion se necesita convertir con as_strided el array anterior para que tenga dimension:
    $$(N, C_{in}, H_{in}, W_{in})$$
Donde 
$$
H_{out} = \left\lfloor 
            \frac{H_{in} + 2 \times padding[0] - dilation[0] \times (kernel\_size[0] -1) - 1}
                 {stride[0]} + 1
           \right\rfloor \\
W_{out} = \left\lfloor 
            \frac{W_{in} + 2 \times padding[1] - dilation[1] \times (kernel\_size[1] -1) - 1}
                 {stride[1]} + 1 
           \right\rfloor \\
$$

In [141]:
N, Cin, Hin, Win = 1, 1, 4, 4
kH, kW = 2, 2
Cout = 1
padding = (0, 0)
dilation = (1, 1)
stride = (1, 1)

Hout = floor((Hin + 2 * padding[0] - dilation[0] * (kH - 1) - 1) / stride[0]  + 1)
Wout = floor((Win + 2 * padding[1] - dilation[1] * (kW - 1) - 1) / stride[1]  + 1)

print('Hout: ', Hout)
print('Wout: ', Wout)

# define output shape
out_shape = (N, Cout, Hout, Wout)
print('out_shape: ', out_shape)

Hout:  3
Wout:  3
out_shape:  (1, 1, 3, 3)


In [142]:
# DATA
data = dataBatched(N, Cin, Hin, Win)
# data = np.zeros_like(data)
# random KERNEL
k = kernel(Cout, Cin, kH, kW)

In [143]:
print(data)
print(k)

[[[[ 1 -4  0  1]
   [ 3  1  0 -1]
   [-1  2 -2 -3]
   [-4 -1 -1 -4]]]]
[[[[1 3]
   [1 0]]]]


strides debe tener una dimension mas que out_shape, ya que cada matrix $H_{in} \times W_{out}$ se debe dividir en todas las posibles mulitplicaciones del kernel.    

tenemos:

    - [ -6 , -5 , -4 , -3 , -2 , -1 ] * itemsize in bytes
    
    posicion -1: dilation[1], horizontal dilation
    posicion -2: Win * dilation[0], vertical dilation
    posicion -3: Hin*Win, jump to next Channel
    posicion -4: stride[1], horizontal stride
    posicion -5: Win*stride[0], vertical stride
    posicion -6: Hin*Win*Cin, next observation in Batched data

y el shape del strided array tiene que ser:
    
    -  [ -6 , -5 , -4 , -3 , -2 , -1 ]
    
    posicion -1: k2
    posicion -2: k1
    posicion -3: Cin
    posicion -4: Wout
    posicion -5: Hout
    posicion -6: N

In [144]:
stridedShape = (
    N,
    Hout, 
    Wout, 
    Cin,
    kH, 
    kW
)

strides = np.array(
    [Hin*Win*Cin,
     Win*stride[0],
     stride[1],
     Hin*Win,
     Win*dilation[0],
     dilation[1]]) * data.itemsize

print('strides no byte: ', strides/data.itemsize)
print('strides in bytes: ', strides)
print('strided output shape: ', stridedShape)
print('kernel shape: ', k.shape)
print('data shape: ', data.shape)

strides no byte:  [16.  4.  1. 16.  4.  1.]
strides in bytes:  [128  32   8 128  32   8]
strided output shape:  (1, 3, 3, 1, 2, 2)
kernel shape:  (1, 1, 2, 2)
data shape:  (1, 1, 4, 4)


In [151]:
# expand data to apply tensordot
strided_array = as_strided(data, shape=stridedShape, strides=strides)


In [152]:
strided_array

array([[[[[[ 1, -4],
           [ 3,  1]]],


         [[[-4,  0],
           [ 1,  0]]],


         [[[ 0,  1],
           [ 0, -1]]]],



        [[[[ 3,  1],
           [-1,  2]]],


         [[[ 1,  0],
           [ 2, -2]]],


         [[[ 0, -1],
           [-2, -3]]]],



        [[[[-1,  2],
           [-4, -1]]],


         [[[ 2, -2],
           [-1, -1]]],


         [[[-2, -3],
           [-1, -4]]]]]])

In [153]:
random = np.random.randint(4, size=strided_array.shape)
strided_array += random

In [154]:
random

array([[[[[[3, 3],
           [3, 0]]],


         [[[0, 0],
           [2, 1]]],


         [[[3, 1],
           [3, 1]]]],



        [[[[0, 3],
           [0, 3]]],


         [[[3, 2],
           [3, 0]]],


         [[[0, 0],
           [2, 2]]]],



        [[[[1, 1],
           [2, 3]]],


         [[[3, 0],
           [3, 1]]],


         [[[3, 0],
           [1, 2]]]]]])

In [155]:
strided_array.flags['C_CONTIGUOUS']

False

In [156]:
data

array([[[[ 4, -4,  3,  2],
         [ 3,  4,  0, -1],
         [ 0,  5,  1, -3],
         [-2,  2,  0, -2]]]])

In [88]:
for i in range(3):
    for j in range(3):
        strided_array[:, i, j] += k
print(strided_array)

[[[[[[0 0]
     [3 5]]]


   [[[0 0]
     [5 5]]]


   [[[0 0]
     [5 2]]]]



  [[[[3 5]
     [3 5]]]


   [[[5 5]
     [5 5]]]


   [[[5 2]
     [5 2]]]]



  [[[[3 5]
     [3 5]]]


   [[[5 5]
     [5 5]]]


   [[[5 2]
     [5 2]]]]]]


In [89]:
data

array([[[[0, 0, 0, 0],
         [3, 5, 5, 2],
         [3, 5, 5, 2],
         [3, 5, 5, 2]]]])

In [59]:
k + strided_array

array([[[[[[5, 6],
           [7, 4]]],


         [[[5, 6],
           [7, 4]]],


         [[[5, 7],
           [7, 5]]]],



        [[[[5, 6],
           [7, 4]]],


         [[[5, 6],
           [7, 4]]],


         [[[5, 7],
           [7, 5]]]],



        [[[[5, 6],
           [9, 6]]],


         [[[5, 6],
           [9, 6]]],


         [[[5, 7],
           [9, 3]]]]]])

In [37]:
strided_array.shape

(1, 3, 3, 1, 2, 2)

In [56]:
strided_array[:] = strided_array + k

In [58]:
print(strided_array)
print(data)

[[[[[[4 4]
     [4 4]]]


   [[[4 4]
     [4 4]]]


   [[[4 5]
     [4 5]]]]



  [[[[4 4]
     [4 4]]]


   [[[4 4]
     [4 4]]]


   [[[4 5]
     [4 5]]]]



  [[[[4 4]
     [6 6]]]


   [[[4 4]
     [6 6]]]


   [[[4 5]
     [6 3]]]]]]
[[[[4 4 4 5]
   [4 4 4 5]
   [4 4 4 5]
   [6 6 6 3]]]]


In [57]:
convolved = np.swapaxes(np.tensordot(k, strided_array, axes=([1, 2, 3], [3, 4, 5])), 0, 1)
print('convolved shape: ', convolved.shape)
print('desired shape: ', out_shape)

convolved shape:  (1, 1, 3, 3)
desired shape:  (1, 1, 3, 3)


## Test MyGrad

```python
class ConvND:
    def __call__(self, x, w, *, stride, padding=0, dilation=1):
        # x ... data:    (N, C, X0, X1, ...)
        # w ... filters: (F, C, W0, W1, ...)
```

In [9]:
from mygrad import ConvND

In [10]:
conv_layer = ConvND()
mygrad_convolve = conv_layer(data, k, stride=stride, padding=padding, dilation=dilation)
print('mygrad convolved shape: ', mygrad_convolve.shape)
print('desired shape: ', out_shape)

mygrad convolved shape:  (2, 5, 9, 9)
desired shape:  (2, 5, 9, 9)


In [11]:
# CHECK!!!
np.all(convolved == mygrad_convolve)

True