In [1]:
# 需要用到的函数
import numpy as np


def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x-np.max(x, axis=0)    # 防止溢出的对策
        y = np.exp(x)/np.sum(np.exp(x), axis=0)
        return y.T

    x = x-np.max(x)    # 防止溢出的对策
    y = np.exp(x)/np.sum(np.exp(x))
    return y


def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    # 监督数据是one-hot-vector的情况下，转换为正确解标签的索引
    if t.size == y.size:
        t = t.argmax(axis=1)    # 最大值所在的索引

    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


def mse(y, t):
    return np.sum((y-t)**2)/len(y)

In [2]:
# 每个层的类
class Affine:
    def __init__(self, w, b):
        self.W = w
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        self.x = x
        out = self.x.dot(self.W)+self.b
        return out

    def backward(self, dout):
        dx = dout.dot(self.W.T)
        self.dW = self.x.T.dot(dout)
        self.db = np.sum(dout, axis=0)
        return dx


class Sigmoid:
    def __init__(self, x):
        self.out = None

    def forward(self, x):
        out = 1/(1+np.exp(-x))
        self.out = out
        return out

    def backward(self, dout):
        dout = dout*self.out(1-self.out)
        return dout


class ReLU:
    def __init__(self):
        self.out = None

    def forward(self, x):
        mask = (x <= 0)
        self.mask = mask
        out = x.copy()
        out[mask] = 0
        return out

    def backward(self, dout):
        dout[self.mask] = 0
        return dout


class SoftMaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        """注意这里返回的是误差"""
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        batchsize = self.t.shape[0]    # 批大小
        dx = (self.y-self.t)/batchsize
        return dx    # 似乎与dout无关


class RegressionLoss:
    def __init__(self):
        self.loss = None
        self.y=None
        self.t=None
    
    def forward(self, x, t):
        self.t=t
        self.y=x
        self.loss=mse(self.y, self.t)
        return self.loss
        
    def backward(self, dout=1):
        batchsize = self.t.shape[0]
        dx=-(self.t-self.y)*self.y*(1-self.y)*self.y
        return dx
    

In [None]:
from collections import OrderedDict


class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 初始化权重
        self.params = {}
        self.params['W1'] = weight_init_std * \
            np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * \
            np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        # 各个层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['ReLU'] = ReLU()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.lastLayer = RegressionLoss()

    def predict(self, x):
        """前向传播"""
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)    # 这里.forward的参数x应该是前面几层的输出y

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)    # 获取最大值索引,看来这里也是批
        if t.ndim != 1:    # 批处理时，可能会导致t维度为2，因为1个样本对应会输出一个列向量y
            t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t)/float(x.shape[0])
        return accuracy

    def gradient(self, x, t):
        self.loss(x, t)    # 不可略，因为其中涉及到前向传播

        dout = 1
        dout = self.lastLayer.backward(dout)
        layers = list(self.layers.values())
        layers.reverse()

        # 反向传播
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['W1'] = self.layers['Affine1'].dW
        grads['b1'] = self.layers['Affine1'].db
        grads['W2'] = self.layers['Affine2'].dW
        grads['b2'] = self.layers['Affine2'].db

        return grads
