## 7 畳み込みネットワーク

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
%matplotlib inline

## 7.4 Convolution / Poolingレイヤの実装

### 7.4.1 4次元配列

In [3]:
import numpy as np
x = np.random.rand(10, 1, 28, 28) # ランダムにデータを生成
x.shape

(10, 1, 28, 28)

In [4]:
x[0].shape # 1つ目のデータにアクセス. (1, 28, 28)

(1, 28, 28)

In [5]:
x[1].shape # 2つ目のデータにアクセス. (1, 28, 28)

(1, 28, 28)

In [6]:
x[0, 0] # 1チャンネル目の空間データにアクセス, もしくはx[0][0]

array([[0.36215656, 0.69369268, 0.10453765, 0.51761177, 0.7421293 ,
        0.84521403, 0.43501947, 0.95746732, 0.06599734, 0.30924712,
        0.46672377, 0.17050326, 0.03700778, 0.56111764, 0.83978287,
        0.9725651 , 0.60205391, 0.5979836 , 0.42867283, 0.04333678,
        0.19890862, 0.09735461, 0.92215834, 0.19515941, 0.43496195,
        0.22499311, 0.03195608, 0.84122903],
       [0.70902291, 0.24183425, 0.52290332, 0.21147288, 0.45741972,
        0.42688416, 0.70303081, 0.95689132, 0.38606602, 0.58734111,
        0.94773956, 0.00326617, 0.82031169, 0.10867583, 0.49304543,
        0.26145155, 0.51168167, 0.57606589, 0.85576638, 0.39627593,
        0.39259156, 0.85832425, 0.77590881, 0.28440228, 0.70234789,
        0.4381869 , 0.87037716, 0.56198674],
       [0.76851666, 0.98029144, 0.29016052, 0.61578205, 0.60544463,
        0.68446486, 0.11562371, 0.29598758, 0.3031701 , 0.48146473,
        0.1392994 , 0.66230867, 0.18632379, 0.49965725, 0.97915141,
        0.72245323, 0.0609

### 7.4.3 Convolutionレイヤの実装

In [9]:
def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    """

    Parameters
    ----------
    input_data : (データ数, チャンネル, 高さ, 幅)の4次元配列からなる入力データ
    filter_h : フィルターの高さ
    filter_w : フィルターの幅
    stride : ストライド
    pad : パディング

    Returns
    -------
    col : 2次元配列
    """
    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

In [10]:
import sys, os
sys.path.append(os.pardir)
from common.util import im2col

x1 = np.random.rand(1, 3, 7, 7)
coll = im2col(x1, 5, 5, stride = 1, pad = 0)
print(coll.shape) # (9, 75)

x2 = np.random.rand(10, 3, 7, 7) # 10個のデータ
col2 = im2col(x2, 5, 5, stride = 1, pad = 0)
print(col2.shape) #(90, 75)

(9, 75)
(90, 75)


In [11]:
class Convolution: 
    def __init__(self, W, b, stride = 1, pad = 0): 
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
    
    def forward(self, x): 
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)
        
        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T # フィルターの展開
        out = np.dot(col, col_W) + self.b
        
        out = out.reshape(N, out_h, out_w, -1).transpose(o, 3, 1, 2)
        
        return out

In [12]:
def col2im(col, input_shape, filter_h, filter_w, stride=1, pad=0):
    """

    Parameters
    ----------
    col :
    input_shape : 入力データの形状（例：(10, 1, 28, 28)）
    filter_h :
    filter_w
    stride
    pad

    Returns
    -------

    """
    N, C, H, W = input_shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1
    col = col.reshape(N, out_h, out_w, C, filter_h, filter_w).transpose(0, 3, 4, 5, 1, 2)

    img = np.zeros((N, C, H + 2*pad + stride - 1, W + 2*pad + stride - 1))
    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :]

    return img[:, :, pad:H + pad, pad:W + pad]

In [13]:
class Convolution:
    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad
        
        # 中間データ（backward時に使用）
        self.x = None   
        self.col = None
        self.col_W = None
        
        # 重み・バイアスパラメータの勾配
        self.dW = None
        self.db = None

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
        out_w = 1 + int((W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T

        out = np.dot(col, col_W) + self.b
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        self.x = x
        self.col = col
        self.col_W = col_W

        return out

    def backward(self, dout):
        FN, C, FH, FW = self.W.shape
        dout = dout.transpose(0,2,3,1).reshape(-1, FN)

        self.db = np.sum(dout, axis=0)
        self.dW = np.dot(self.col.T, dout)
        self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)

        dcol = np.dot(dout, self.col_W.T)
        dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)

        return dx

### 7.4.4 Poolingレイヤの実装

In [None]:
class Pooling:
    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad
        
#        self.x = None
#        self.arg_max = None

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        
        # 展開(1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self.pool_w)

#        arg_max = np.argmax(col, axis=1)
        # 最大値(2)
        out = np.max(col, axis=1)
        # 最大値(3)
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

#        self.x = x
#        self.arg_max = arg_max

        return out

    def backward(self, dout):
        dout = dout.transpose(0, 2, 3, 1)
        
        pool_size = self.pool_h * self.pool_w
        dmax = np.zeros((dout.size, pool_size))
        dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
        dmax = dmax.reshape(dout.shape + (pool_size,)) 
        
        dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
        dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
        
        return dx