使用纯Numpy实现神经网络之卷积层
===

In [1]:
import numpy as np
import math

# 1.初始化

- 输入数据的$shape=[N,W,H,C]$,$N=Batchsize, W=width, H=height, C=channels$
- 卷积核的尺寸$ksize$,个数output_channels, kernel $shape=[outputChannels,k,k,C]$
- 卷积的步长，基本默认为1.
- 卷积的方法，VALID or SAME，即是否通过padding保持输出图像与输入图像的大小不变


In [None]:
class Conv2D(object):
    def __init__(self, shape, output_channels, ksize=3, stride=1, method='VALID'):
        self.input_shape = shape # 输入图片的尺寸
        self.output_channels = output_channels # 输出特征图的channels
        self.input_channels = shape[-1] # 输入图片的channels
        self.batchsize = shape[0] # 输入数据的batch size
        self.stride = stride
        self.ksize = ksize
        self.method = method
        weights_scale = math.sqrt(ksize*ksize*self.input_channels/2)
        self.weights = np.random.standard_normal((ksize, ksize, self.input_channels, self.output_channels)) / weights_scale # 权重
        self.bias = np.random.standard_normal(self.output_channels) / weights_scale # 偏置
        # 下面的部分是backward中要用到的
        if method == 'VALID':
            self.eta = np.zeros((shape[0], (shape[1] - ksize ) / self.stride + 1, (shape[1] - ksize ) / self.stride+1,
            self.output_channels))

        if method == 'SAME':
            self.eta = np.zeros((shape[0], shape[1]/self.stride, shape[2]/self.stride,self.output_channels))

        self.w_gradient = np.zeros(self.weights.shape)
        self.b_gradient = np.zeros(self.bias.shape)
        self.output_shape = self.eta.shape

将权重和偏置除以weights_scale，可以更好的初始化，也就是MSRA方法

# 2.前向传播-forward