In [8]:
import os
import numpy as np
from function import *
import scipy.io as scio
import time
import PIL.Image as PIL
from collections import OrderedDict
from layersAll import *
from optimizer import *


In [9]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size,hidden1_size, output_size, weight_init_std = 0.1):
        '''
        初始化权重
        randn生成正太分布的值
        '''
        self.params = {}
        self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = weight_init_std * np.random.randn(hidden_size, hidden1_size)
        self.params['b2'] = np.zeros(hidden1_size)
        self.params['w3'] = weight_init_std * np.random.randn(hidden1_size,output_size)
        self.params['b3'] = np.zeros(output_size)

        #生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'],self.params['b1'])
        self.layers['sigmoid'] = Sigmoid()
        self.layers['Affine2'] = Affine(self.params['w2'],self.params['b2'])
        self.layers['sigmoid1'] = Sigmoid()
        self.layers['Affine3'] = Affine(self.params['w3'],self.params['b3'])
        self.last_layer = SoftmaxWithLoss()

    def predict1(self, x):
        '''不使用层'''
        w1,w2 = self.params['w1'], self.params['w2']
        b1,b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, w1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, w2) + b2
        y = softmax(a2)
        return y

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss_copy(self, x, t):
        '''原loss'''
        '''
        :param x: 输入数据
        :param t: 监督数据
        :return: 交叉熵损失
        '''
        y = self.predict1(x)
        return cross_entropy_error(y, t)

    def loss(self, x, t):
        '''
        :param x: 输入数据
        :param t: 监督数据
        :return: 交叉熵损失
        '''
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        '''计算正确率函数'''
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(t==y)/float(x.shape[0])
        return accuracy

    def numerical_gradient_tag(self,x,t):
        '''计算权重梯度的函数'''
        loss_w = lambda w:self.loss_copy(x, t)
        grads = {}
        grads['w1'] = numerical_gradient(loss_w, self.params['w1'])
        grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
        grads['w2'] = numerical_gradient(loss_w, self.params['w2'])
        grads['b2'] = numerical_gradient(loss_w, self.params['b2'])

        return grads

    def gradinet(self, x, t):
        # forward
        self.loss(x,t)
        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        # print(dout,dout.shape)
        layers = list(self.layers.values())
        layers.reverse()
        # print(layers)
        for layer in layers:
            dout = layer.backward(dout)
        # 设置
        grad = {}
        grad['w1'] = self.layers['Affine1'].dw
        grad['b1'] = self.layers['Affine1'].db
        grad['w2'] = self.layers['Affine2'].dw
        grad['b2'] = self.layers['Affine2'].db
        grad['w3'] = self.layers['Affine3'].dw
        grad['b3'] = self.layers['Affine3'].db
        return  grad

In [10]:
net = TwoLayerNet(input_size=784, hidden_size = 80, hidden1_size=50, output_size= 10)
(net.params['w1'].shape,net.params['b1'].shape,net.params['w2'].shape,net.params['b2'].shape,
 net.params['w3'].shape,net.params['b3'].shape)

((784, 80), (80,), (80, 50), (50,), (50, 10), (10,))

In [11]:
x_train,t_train,x_test,t_test = getdata()
x_train.shape, t_train.shape, x_test.shape, t_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

In [12]:
#超参数设置
num_epoch = 20000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size/batch_size,1)
optimizer = SGD()
# optimizer_M = Momentum()

In [13]:
# image = PIL.fromarray(x_train[99].reshape(28,28)*255)
# image.show(),t_train[99]

temp_x = x_train[:5]
temp_t = t_train[:5]
# grad_numercial = net.numerical_gradient(temp_x, temp_t)
# grad_backprop = net.gradinet(temp_x,temp_t)
# y = net.predict1(temp_x)
# y1 = net.predict(temp_x)
# y1=softmax(y1)
# y[0],sum(y[0]),y1[0],sum(y1[0])

grad_numercial = net.numerical_gradient_tag(temp_x, temp_t)
grad_backprop = net.gradinet(temp_x,temp_t)

grad_backprop['w2'][0],grad_numercial['w2'][0]

# for key in grad_numercial.keys():
#     diff =np.average(np.abs(grad_backprop[key]-grad_numercial[key]))
#     print(key + ":"+str(diff))

In [14]:
batch_mask = 0
for i in range(num_epoch):
    #'''获取mini_batch'''
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # #计算梯度
    # grad = net.numerical_gradient(x_batch,t_batch)
    grad = net.gradinet(x_batch,t_batch)
    #更新参数
    # for key in ('w1','b1','w2','b2'):
    #     net.params[key] -= learning_rate * grad[key]
    #更新参数-1
    param = net.params
    optimizer.update(param,grad)
    # optimizer_M.update(param,grad)


    #记录学习过程
    loss = net.loss(x_batch, t_batch)
    print(loss)
    train_loss_list.append(loss)
    #计算每个epoch的识别精度
    if i%batch_size==0:
        train_acc = net.accuracy(x_train,t_train)
        train_acc_list.append(train_acc)
        test_acc = net.accuracy(x_test,t_test)
        test_acc_list.append(test_acc)
        print("train_acc, test_acc |" + str(train_acc)+"," +str(test_acc))

702.9308288056066
train_acc, test_acc |0.09751666666666667,0.0974
333.61068555370144
278.52783308447806
275.38850026911166
254.91740402079705
231.96100819412266
231.60664580260706
239.434328621663
233.22830344930304
227.01413342697373
229.0268098052983
222.47866459244102
225.34907212998263
228.7551393612188
231.17669490885095
228.70819795749884
241.00318699716325
236.37957417461448
234.25522268805156
230.94449849939167
246.6420754478196
241.82104586952104
233.41166682108752
227.62963196692067
227.9134491368167
229.5148815291944
221.3777171601506
242.35274840798303
229.79860293962167
229.1953851258977
229.7935677269013
229.48171396432124
229.55386854016334
226.26659376054715
234.409243922759
226.86992805818358
229.77121143878864
226.89704427015704
231.93275247580988
230.8026421128834
230.6908772471905
232.03964720394893
234.3398555925994
229.24052074080873
230.55337996820663
232.57509525103222
225.89650042580303
228.0141337593103
225.19453037086888
240.8698741795876
244.1024266760675
23

KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
len(train_loss_list)
plt.plot(range(len(train_loss_list)),train_loss_list)

In [None]:
plt.plot(range(len(train_acc_list)),train_acc_list)

In [None]:
plt.plot(range(len(test_acc_list)),test_acc_list)