# ゼロから作るDeep Learning
## 7章 畳み込みニューラルネットワーク
### 7.2 畳み込み層
#### 7.2.2 畳み込み演算

In [2]:
import numpy as np

input_data = np.array([[1, 2, 3, 0],
                       [0, 1, 2, 3],
                       [3, 0, 1, 2],
                       [2, 3, 0, 1]])

kernel = np.array([[2, 0, 1],
                   [0, 1, 2],
                   [1, 0, 2]])

c11 = np.sum(input_data[:3, :3].flatten() * kernel.flatten())
c21 = np.sum(input_data[1:, :3].flatten() * kernel.flatten())
c12 = np.sum(input_data[:3, 1:].flatten() * kernel.flatten())
c22 = np.sum(input_data[1:, 1:].flatten() * kernel.flatten())

convoluted_data = np.array([[c11, c12],
                            [c21, c22]])

print('----- 畳み込み演算の結果表示 -----')

print('入力データ:')
print(input_data)
print()

print('フィルター:')
print(kernel)
print()

print('結果:')
print(convoluted_data)
print()

----- 畳み込み演算の結果表示 -----
入力データ:
[[1 2 3 0]
 [0 1 2 3]
 [3 0 1 2]
 [2 3 0 1]]

フィルター:
[[2 0 1]
 [0 1 2]
 [1 0 2]]

結果:
[[15 16]
 [ 6 15]]



#### 7.3.1 プーリング層の特徴
プーリングは縦・横方向の空間を小さくする演算。  
プーリング層には下記の3つの特徴がある。  
1. 学習するパラメータが無い
2. チャンネル数は変化しない
3. 微小な位置変化に対してロバスト  
   この特徴のおかげで最終的な出力層での回帰の結果が安定する

Maxプーリングが一番有名で、以降はプーリング層にはMaxプーリングを使う。

## 7.4 Convolution / Poolingレイヤの実装
#### 7.4.3 Convolutionレイヤの実装

In [4]:
# im2col関数の使い方
import os
import numpy as np
from common.util import im2col

x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


In [5]:
class Convolution(object):

    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):

        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape

        out_h = int(1 + (H + 2*self.pad - FN) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)

        col = im2col(x, FH, FW, self.stride, self.pad)
        col_W = self.W.reshape(FN, -1).T
        out = np.dot(col, col_W) + self.b

        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        return out

In [6]:
class Pooling(object):

    def __init__(self, pool_h, pool_w, stride=1, pad=0):

        self.pool_h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        
        # 展開(1)
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h * self.pool_w)
        
        # 最大値(2)
        out = np.max(col, axis=1)
        
        # 整形
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
        
        return out

## 7.5 CNNの実装

In [11]:
from collections import OrderedDict
import numpy as np
from common.layers import (Convolution,
                           Pooling,
                           Relu,
                           Affine,
                           SoftmaxWithLoss)

class SimpleConvNet(object):

    def __init__(self,
                 input_dim=(1, 28, 28),
                 conv_param={'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
                 hidden_size=100,
                 output_size=10,
                 weight_init_std=0.01):

        filter_num = conv_param['filter_num']
        filter_size = conv_param['filter_size']
        filter_pad = conv_param['pad']
        filter_stride = conv_param['stride']
        input_size = input_dim[1]
        conv_output_size = (input_size - filter_size + 2 * filter_pad / filter_stride + 1)
        pool_output_size = int(filter_num * (conv_output_size / 2) * (conv_output_size / 2))

        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
        self.params['b1'] = np.zeros(filter_num)
        self.params['W2'] = weight_init_std * np.random.randn(pool_output_size, hidden_size)
        self.params['b2'] = np.zeros(hidden_size)
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b3'] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers['Conv1'] = layers.Convolution(self.params['W1'],
                                                  self.params['b1'],
                                                  conv_param['stride'],
                                                  conv_param['pad'])
        self.layers['Relu1'] = layers.Relu()
        self.layers['Pool1'] = layers.Pooling(pool_h=2, pool_w=2, stride=2)
        self.layers['Affine1'] = layers.Affine(self.params['W2'], self.params['b2'])
        self.layers['Relu2'] = layers.Relu()
        self.layers['Affine2'] = layers.Affine(self.params['W3'], self.params['b3'])

        self.last_layer = SoftmaxWithLoss()

    def predict(self, x):

        for layer in self.layers.values():
            x = layer.forward(x)

        return x
    
    def loss(self, x, t):
        
        y = self.predict(x)
        
        return self.last_layer.forward(y, t)
    
    def gradient(self, x, t):
        
        # 順伝播
        self.loss(x, t)
        
        # 逆伝播
        dout = 1
        dout = self.last_layer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)
        
        # 設定
        grads = {}
        grads['W1'] = self.layers['Conv1'].dW
        grads['b1'] = self.layers['Conv1'].db
        grads['W2'] = self.layers['Affine1'].dW
        grads['b2'] = self.layers['Affine1'].db
        grads['W3'] = self.layers['Affine2'].dW
        grads['b3'] = self.layers['Affine3'].db
        
        return grads