In [1]:
import numpy as np
from numpy import typing as npt

### What's the difference between the SciPy library and other deep-learning frameworks in Convolution operation?

In [2]:
import scipy.signal

In [3]:
image = [[1, 2, 3, 4, 5, 6, 7],
         [8, 9, 10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19, 20, 21],
         [22, 23, 24, 25, 26, 27, 28],
         [29, 30, 31, 32, 33, 34, 35],
         [36, 37, 38, 39, 40, 41, 42],
         [43, 44, 45, 46, 47, 48, 49]]

filter_kernel = [[-1, 1, -1],
                 [-2, 3, 1],
                 [2, -6, 0]]

In [4]:
res1 = scipy.signal.convolve2d(image, filter_kernel, mode='same', boundary='fill', fillvalue=0)
print(res1)

[[  -2   -8   -7   -6   -5   -4   28]
 [   3   -7  -10  -13  -16  -19   14]
 [ -18  -28  -31  -34  -37  -40    0]
 [ -39  -49  -52  -55  -58  -61  -14]
 [ -60  -70  -73  -76  -79  -82  -28]
 [ -81  -91  -94  -97 -100 -103  -42]
 [-101  -61  -63  -65  -67  -69  -57]]


In [5]:
# This SciPy's result corresponds with other deep-learning frameworks' calculated value.
# Because the SciPy library has stricter calculation methods than other deep-learning frameworks.
res2 = scipy.signal.correlate2d(image, filter_kernel, mode="same", boundary="fill", fillvalue=0)
print(res2)

[[ -43  -31  -33  -35  -37  -39  -49]
 [ -58  -47  -50  -53  -56  -59  -69]
 [ -72  -68  -71  -74  -77  -80  -90]
 [ -86  -89  -92  -95  -98 -101 -111]
 [-100 -110 -113 -116 -119 -122 -132]
 [-114 -131 -134 -137 -140 -143 -153]
 [ 172   54   55   56   57   58   52]]


### Convolutional layer

In [6]:
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """다수의 이미지를 입력받아 2차원 배열로 변환한다(평탄화).
    
    Parameters
    ----------
    input_data : 4차원 배열 형태의 입력 데이터(이미지 수, 채널 수, 높이, 너비)
    filter_h : 필터의 높이
    filter_w : 필터의 너비
    stride : 스트라이드
    pad : 패딩
    
    Returns
    -------
    col : 2차원 배열
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

In [7]:
x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape) # (1*3*3, 5*5*3)

(9, 75)


In [8]:
x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape) # (10*3*3, 5*5*3)

(90, 75)


In [9]:
class Convolution:
    def __init__(self, W: npt.NDArray, b: npt.NDArray, stride: int=1, pad: int=0):
        self.W: npt.NDArray = W
        self.b: npt.NDArray = b
        self.stride: int = stride
        self.pad: int = pad

    def forward(self, x: npt.NDArray) -> npt.NDArray:
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b

        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        return out