In [5]:
import numpy as np
%run ../common/functions.ipynb
%run ../common/gradient.ipynb
# params 保存神经网络的参数的字典变量（实例变量）
# params['W1']是第1层的权重，params['b1']是第1层的偏置
# params['W2']是第2层的权重，params['b2']是第2层的偏置

# grads 保存梯度的字典型变量（numerical_gradient()方法的返回值）
# grads['W1']是第1层权重的梯度，grads['b1']是第1层偏置的梯度
# grads['W2']是第2层权重的梯度，grads['b2']是第2层偏置的梯度

class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params['b2'] = np.zeros(output_size)
    
    def predict(self,x):
        # 参数x是图像数据
        # 进行识别（推理）
        W1,W2 = self.params['W1'],self.params['W2']
        b1,b2 = self.params['b1'],self.params['b2']
        
        a1 = np.dot(x,W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1,W2) + b2
        y = sigmoid(a2)
        return y

    # x:读入数据， t:监督数据
    def loss(self,x,t):
        y = self.predict(x)
        return cross_entropy_error(y,t) 

    def accuracy(self,x,t):
        # 计算识别精度
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    # x:输入数据, t:监督数据
    def numerical_gradient(self, x, t):
        # 计算权重参数的梯度
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    def gradient(self, x, t):
        # 计算权重参数的梯度，numerical_gradient()的高速版
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        grads = {}
        
        batch_num = x.shape[0]
        
        # forward
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        # backward
        dy = (y - t) / batch_num
        grads['W2'] = np.dot(z1.T, dy)
        grads['b2'] = np.sum(dy, axis=0)
        
        da1 = np.dot(dy, W2.T)
        dz1 = sigmoid_grad(a1) * da1
        grads['W1'] = np.dot(x.T, dz1)
        grads['b1'] = np.sum(dz1, axis=0)

        return grads  