In [95]:
import numpy as np
import matplotlib.pyplot as plt
import os,sys
import os.path

In [96]:
import sys, os
sys.path.append(os.pardir)  # 为了导入父目录的文件而进行的设定
import numpy as np
import pickle
import numpy as np
import os.path
import gzip
import urllib.request

dataset_path = os.path.dirname(os.path.abspath('exercise.ipynb'))
save_file = os.path.join(dataset_path,'mnist.pkl')

url_base = 'http://yann.lecun.com/exdb/mnist/'
key_file = {
    'train_img':'train-images-idx3-ubyte.gz',
    'train_label':'train-labels-idx1-ubyte.gz',
    'test_img':'t10k-images-idx3-ubyte.gz',
    'test_label':'t10k-labels-idx1-ubyte.gz'
}

train_num = 60000
test_num = 10000
img_size = 784
img_dim = (1,28,28)


def downloadfile(file_name):
    file_path = os.path.join(dataset_path,file_name)

    if os.path.exists(file_path):
        return
    print('Downloading '+file_name)
    urllib.request.urlretrieve(url_base+file_name,file_path)
    print('Done')

def downloadmnist():
    for v in key_file.values():
        downloadfile(v)

def load_img(file_name):
    file_path = os.path.join(dataset_path,file_name)
    print('converting '+file_name+" to numpy array")
    with gzip.open(file_path,"rb") as f:
        data = np.frombuffer(f.read(),np.uint8,offset=16)
    
    data = data.reshape(-1,img_size)
    print('done')

    return data

def load_label(file_name):
    file_path = os.path.join(dataset_path,file_name)
    print('converting '+file_name+'to numpy array')
    with gzip.open(file_path,'rb') as f:
        labels = np.frombuffer(f.read(),np.uint8,offset=8)
    print('Done')

    return labels

def convert_numpy():
    dataset = {}
    dataset['train_img'] = load_img(key_file['train_img'])
    dataset['train_label'] = load_label(key_file['train_label'])
    dataset['test_img'] = load_img(key_file['test_img'])
    dataset['test_label'] = load_label(key_file['test_label'])

    return dataset

def init_mnist():
    downloadmnist()
    dataset = convert_numpy()
    print('creating pickle file')
    with open(save_file,'wb') as f:
        pickle.dump(dataset,f,-1)
    print('done')

def change_one_hot(x):
    t = np.zeros((x.size,10))
    for idx,row in enumerate(t):
        row[x[idx]] = 1
    return t


def load_mnist(normalize=True, flatten=True, one_hot_label=False):
    """读入MNIST数据集
    
    Parameters
    ----------
    normalize : 将图像的像素值正规化为0.0~1.0
    one_hot_label : 
        one_hot_label为True的情况下,标签作为one-hot数组返回
        one-hot数组是指[0,0,1,0,0,0,0,0,0,0]这样的数组
    flatten : 是否将图像展开为一维数组
    
    Returns
    -------
    (训练图像, 训练标签), (测试图像, 测试标签)
    """
    if not os.path.exists(save_file):
        init_mnist()
        
    with open(save_file, 'rb') as f:
        dataset = pickle.load(f)
    
    if normalize:
        for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].astype(np.float32)
            dataset[key] /= 255.0
            
    if one_hot_label:
        for key in ('train_label', 'test_label'):
            dataset[key] = change_one_hot(dataset[key])
    
    if not flatten:
         for key in ('train_img', 'test_img'):
            dataset[key] = dataset[key].reshape(-1, 1, 28, 28)

    
    return  dataset['train_img'], dataset['train_label'], dataset['test_img'], dataset['test_label']

乘法层（MulLayer）

In [97]:
class MulLayer:

    #初始化、前向传播、反向传播
    
    def __init__(self) -> None:
        self.x = None
        self.y = None

    def forward(self,x,y):
        self.x = x
        self.y = y
        out = x*y
        
        return out
    
    #dout为上游传来的导数
    def backward(self,dout):
        dx = dout*self.y
        dy = dout*self.x
        
        return dx,dy

In [98]:
apple = 100
apple_num = 2
tax = 1.1

mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

apple_price = mul_apple_layer.forward(apple,apple_num)
price = mul_tax_layer.forward(apple_price,tax)

price

220.00000000000003

In [99]:
dprice = 1
dapple_price,dtax = mul_tax_layer.backward(dprice)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)


dapple,dapple_num,dtax

(2.2, 110.00000000000001, 200)

加法层（AddLayer）

In [100]:
class AddLayer:
    def __init__(self) -> None:
        pass

    def forward(self,x,y):
        out = x+y
        return out
    
    def backward(self,dout):
        dx = dout * 1
        dy = dout * 1

        return dx,dy
        

In [101]:
apple = 100
apple_num = 2
orange = 150
orange_num = 3
tax = 1.1

mul_apple_layer = MulLayer()
mul_orange_layer = MulLayer()
add_apple_orange_layer = AddLayer()
mul_sumtax_layer = MulLayer()


In [102]:
apple_price = mul_apple_layer.forward(apple,apple_num)
orange_price = mul_orange_layer.forward(orange,orange_num)
all_price = add_apple_orange_layer.forward(apple_price,orange_price)
price = mul_tax_layer.forward(all_price,tax)

dprice = 1
dall_price,dtax = mul_tax_layer.backward(dprice)
dapple_price,dorange_price = add_apple_orange_layer.backward(dall_price)
dorange,dorange_num = mul_orange_layer.backward(dorange_price)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)

price,dapple,dapple_num,dorange,dorange_num,dtax


(715.0000000000001, 2.2, 110.00000000000001, 3.3000000000000003, 165.0, 650)

ReLU层

In [103]:
class Relu:
    def __init__(self) -> None:
        self.mask = None

    #传入的x不是单个值 简单比大小就可以的 所以需要mask进行处理
    def forward(self,x):
        self.mask = (x<=0)
        out = x.copy()
        out[self.mask] = 0

    #mask是由True/False构成的Numpy数组，会把正向传播时
    #输入x中<=0的地方保存为True，其他地位False
    #mask和x尺寸一样
        return out
    
    def backward(self,dout):
        dout[self.mask] = 0
        dx = dout

        return dx

In [104]:
x = np.array([[1.0,-0.5],[-2.0,3.0]])
mask = (x<=0)
x,mask,x[mask]

(array([[ 1. , -0.5],
        [-2. ,  3. ]]),
 array([[False,  True],
        [ True, False]]),
 array([-0.5, -2. ]))

Sigmoid层

In [105]:
class Sigmoid:
    def __init__(self) -> None:
        self.out = None

    def forward(self,x):
        out = 1/(1+np.exp(-x))
        return out
    
    def backward(self,dout):
        dx = dout * self.out * (1-self.out)
        return dx

Affine层\
神经网络的正向传播中进行的矩阵乘积运算在几何领域被称为仿射变换 因此称为Affine层

In [106]:
x_dot_w = np.array([[0,0,0],[10,10,10]])
b = np.array([1,2,3])
x_dot_w,x_dot_w+b
                   

(array([[ 0,  0,  0],
        [10, 10, 10]]),
 array([[ 1,  2,  3],
        [11, 12, 13]]))

矩阵的导数 和矩阵的尺寸一样 所以偏置b的导数 是把dy第0轴进行求和

In [107]:
class Affine:
    def __init__(self,w,b) -> None:
        self.w = w
        self.b = b
        self.x = None
        self.dw = None
        self.db = None

    def forward(self,x):
        self.x = x
        out = np.dot(x,self.w)+self.b

    def backward(self,dout):
        dx = np.dot(dout,self.w.T)
        self.dw = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)

        return dx

In [108]:
class Affine:
    def __init__(self, w, b):
        self.w =w
        self.b = b
        self.x = None
        self.original_x_shape = None
        # 权重和偏置参数的导数
        self.dw = None
        self.db = None

    def forward(self, x):
        # 对应张量
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.w) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.w.T)
        self.dw = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        dx = dx.reshape(*self.original_x_shape)  # 还原输入数据的形状（对应张量）
        return dx


In [109]:
def softmax(x):
    if x.ndim == 2:
        x = x.T
        max_x = np.max(x,axis=0)
        x = x-max_x
        y = np.exp(x)/np.sum(np.exp(x),axis=0)

        return y.T
    
    max_x = np.max(x)
    x = x-max_x
    y = np.exp(x)/np.sum(np.exp(x))

def cross_entropy_error(y,t):
    delta = 1e-4
    if y.ndim == 1:
        y = y.reshape(1,y.size)
        t = t.reshape(1,t.size)

    batch_size = y.shape[0]

    if y.size == t.size:
        t = np.argmax(t,axis=1)#t = t.argmax(axis = 1)

    return -np.sum(np.log(y[np.arange(batch_size),t]+delta))/batch_size

In [110]:
class SoftmaxWithLoss:
    def __init__(self) -> None:
        self.loss = None
        self.y = None
        self.t = None

    def forward(self,x,t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y,self.t)

        return self.loss
    
    def backward(self,dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        return dx

In [111]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None # softmax的输出
        self.t = None # 监督数据

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size: # 监督数据是one-hot-vector的情况
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y.copy()
            dx[np.arange(batch_size), self.t] -= 1
            dx = dx / batch_size
        
        return dx

In [112]:
from collections import OrderedDict

In [113]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 还原值
        it.iternext()   
        
    return grad

In [114]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.1) -> None:
        self.params = {}
        self.params['w1'] = weight_init_std * np.random.randn(input_size,hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params['b2'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'],self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['w2'],self.params['b2'])
        
        self.lastlayer = SoftmaxWithLoss()


    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x
    
    def loss(self,x,t):
        y = self.predict(x)
        return self.lastlayer.forward(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        if t.ndim!=1:t = np.argmax(t,axis=1)
        accuracy = np.sum(y==t)/float(x.shape[0])

        return accuracy
    

    def num_gradient(self,x,t):
        loss_w = lambda w: self.loss(x,t)

        grads = {}
        grads['w1'] = numerical_gradient(loss_w,self.params['w1'])
        grads['b1'] = numerical_gradient(loss_w,self.params['b1'])
        grads['w2'] = numerical_gradient(loss_w,self.params['w2'])
        grads['b2'] = numerical_gradient(loss_w,self.params['b2'])

        return grads
    
    def gradinet(self,x,t):
        self.loss(x,t)

        dout = 1
        dout = self.lastlayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {}
        grads['w1'] = self.layers['Affine1'].dw
        grads['b1'] = self.layers['Affine1'].db
        grads['w2'] = self.layers['Affine2'].dw
        grads['b2'] = self.layers['Affine2'].db 
        
        return grads

In [119]:
x_train,t_train,x_test,t_test = load_mnist(normalize=True,flatten=True,one_hot_label=True)
x_train.shape

(60000, 784)

In [116]:
network = TwoLayerNet(input_size=784,hidden_size=50,output_size=10)
x_batch = x_train[:3]
t_batch = t_train[:3]

In [117]:
grad_numerical = network.num_gradient(x_batch,t_batch)
grad_backprop = network.gradinet(x_batch,t_batch)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key]-grad_numerical[key]))
    print(key + ' : '+str(diff))

w1 : 4.069151472354677e-06
b1 : 2.4143394849956665e-05
w2 : 6.171952756299422e-05
b2 : 0.0001385209373427773
