# 7.4 卷积层和池化层的实现

## 7.4.1 4维数据

In [1]:
import numpy as np
x = np.random.rand(10, 1, 28, 28)
x.shape

(10, 1, 28, 28)

In [3]:
print(x[0].shape)
print(x[1].shape)

(1, 28, 28)
(1, 28, 28)


## 7.4.3 卷积层的实现

实现im2col

In [6]:
# im2col: common/util.py

def im2col(input_data, filter_h, filter_w, stride=1, pad=0):
    
    """
    Parameters
    ----------
    input_data : 由(数据量, 通道, 高, 长)的4维数组构成的输入数据
    filter_h : 滤波器的高
    filter_w : 滤波器的长
    stride : 步幅
    pad : 填充

    Returns
    -------
    col : 2维数组
    """

    N, C, H, W = input_data.shape
    out_h = (H + 2*pad - filter_h)//stride + 1
    out_w = (W + 2*pad - filter_w)//stride + 1

    img = np.pad(input_data, [(0,0), (0,0), (pad, pad), (pad, pad)], 'constant')
    col = np.zeros((N, C, filter_h, filter_w, out_h, out_w))

    for y in range(filter_h):
        y_max = y + stride*out_h
        for x in range(filter_w):
            x_max = x + stride*out_w
            col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride]

    col = col.transpose(0, 4, 5, 1, 2, 3).reshape(N*out_h*out_w, -1)
    return col

im2col实例

In [8]:
import sys, os
sys.path.append(os.pardir)
from common.util import im2col

x1 = np.random.rand(1, 3, 7, 7)
col1 = im2col(x1, 5, 5, stride=1, pad=0)
print(col1.shape)

# 批数据（10个）
x2 = np.random.rand(10, 3, 7, 7)
col2 = im2col(x2, 5, 5, stride=1, pad=0)
print(col2.shape)

(9, 75)
(90, 75)


利用im2col实现卷积层

- Convolution类
- 实现向前传播
- 反向传播参见commom/layer.py

In [9]:
import numpy as np

class Convolution:

    def __init__(self, W, b, stride=1, pad=0):
        self.W = W
        self.b = b
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        FN, C, FH, FW = self.W.shape
        N, C, H, W = x.shape
        out_h = int(1 + (H + 2*self.pad - FH) / self.stride)
        out_w = int(1 + (W + 2*self.pad - FW) / self.stride)

        """
        col = im2col(x, FH, FW, self.stride, self.pad)
        # 滤波器的展开
        col_W = self.W.reshape(FN, -1).T
        # 卷积运算，与全连接层相同
        out = np.dot(col, col_W) + self.b 
        """

        # 将输出大小转化为合适形状
        out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)

        return out

## 7.4.4 池化层的实现
common/layer.py

In [12]:
# 前向传播
class Pooling:

    def __init__(self, pool_h, pool_w, stride=1, pad=0):
        self.pool.h = pool_h
        self.pool_w = pool_w
        self.stride = stride
        self.pad = pad

    def forward(self, x):
        N, C, H, W = x.shape
        out_h = int(1 + (H - self.pool_h) / self.stride)
        out_w = int(1 + (W - self.pool_w) / self.stride)
        
        # (1)展开
        col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
        col = col.reshape(-1, self.pool_h*self, pool_w)

        # (2)求最大值
        out = np.max(col, axis=1)

        # (3)转换
        out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)

        return out