In [105]:
import os
import numpy as np
from function import *
import scipy.io as scio
import time
import PIL.Image as PIL
from collections import OrderedDict
from layersAll import *

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [106]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.1):
        '''
        初始化权重
        randn生成正太分布的值
        '''
        self.params = {}
        self.params['w1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['w2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)

        #生成层
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['w1'],self.params['b1'])
        self.layers['sigmoid'] = Sigmoid()
        self.layers['Affine2'] = Affine(self.params['w2'],self.params['b2'])
        self.last_layer = SoftmaxWithLoss()

    def predict1(self, x):
        '''不使用层'''
        w1,w2 = self.params['w1'], self.params['w2']
        b1,b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, w1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, w2) + b2
        y = softmax(a2)
        return y

    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x

    def loss_copy(self, x, t):
        '''
        :param x: 输入数据
        :param t: 监督数据
        :return: 交叉熵损失
        '''
        y = self.predict1(x)
        return cross_entropy_error(y, t)

    def loss(self, x, t):
        '''
        :param x: 输入数据
        :param t: 监督数据
        :return: 交叉熵损失
        '''
        y = self.predict(x)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t):
        '''计算正确率函数'''
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        accuracy = np.sum(t==y)/float(x.shape[0])
        return accuracy

    def numerical_gradient_tag(self,x,t):
        '''计算权重梯度的函数'''
        loss_w = lambda w:self.loss_copy(x, t)
        grads = {}
        grads['w1'] = numerical_gradient(loss_w, self.params['w1'])
        grads['b1'] = numerical_gradient(loss_w, self.params['b1'])
        grads['w2'] = numerical_gradient(loss_w, self.params['w2'])
        grads['b2'] = numerical_gradient(loss_w, self.params['b2'])

        return grads

    def gradinet(self, x, t):
        # forward
        self.loss(x,t)
        # backward
        dout = 1
        dout = self.last_layer.backward(dout)
        # print(dout,dout.shape)
        layers = list(self.layers.values())
        layers.reverse()
        # print(layers)
        for layer in layers:
            dout = layer.backward(dout)
        # 设置
        grad = {}
        grad['w1'] = self.layers['Affine1'].dw
        grad['b1'] = self.layers['Affine1'].db
        grad['w2'] = self.layers['Affine2'].dw
        grad['b2'] = self.layers['Affine2'].db
        return  grad

In [107]:
net = TwoLayerNet(input_size=784, hidden_size=100, output_size= 10)
net.params['w1'].shape,net.params['b1'].shape,net.params['w2'].shape,net.params['b2'].shape

((784, 100), (100,), (100, 10), (10,))

In [108]:
x_train,t_train,x_test,t_test = getdata()
x_train.shape, t_train.shape, x_test.shape, t_test.shape

((60000, 784), (60000, 10), (10000, 784), (10000, 10))

In [109]:
#超参数设置
num_epoch = 20000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.15
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size/batch_size,1)

In [110]:
# image = PIL.fromarray(x_train[99].reshape(28,28)*255)
# image.show(),t_train[99]

In [None]:
temp_x = x_train[:5]
temp_t = t_train[:5]
#### grad_numercial = net.numerical_gradient(temp_x, temp_t)
#### grad_backprop = net.gradinet(temp_x,temp_t)
#### y = net.predict1(temp_x)
#### y1 = net.predict(temp_x)
#### y1=softmax(y1)
#### y[0],sum(y[0]),y1[0],sum(y1[0])


In [None]:
# grad_numercial = net.numerical_gradient_tag(temp_x, temp_t)
# grad_backprop = net.gradinet(temp_x,temp_t)

In [None]:
# grad_backprop['w2'][0],grad_numercial['w2'][0]

# for key in grad_numercial.keys():
#     diff =np.average(np.abs(grad_backprop[key]-grad_numercial[key]))
#     print(key + ":"+str(diff))


In [111]:
batch_mask = 0
for i in range(num_epoch):
    #'''获取mini_batch'''
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]

    # #计算梯度
    # grad = net.numerical_gradient(x_batch,t_batch)
    grad = net.gradinet(x_batch,t_batch)
    #更新参数
    for key in ('w1','b1','w2','b2'):
        net.params[key] -= learning_rate * grad[key]

    #记录学习过程
    loss = net.loss(x_batch, t_batch)
    print(loss)
    train_loss_list.append(loss)
    #计算每个epoch的识别精度
    if i%batch_size==0:
        train_acc = net.accuracy(x_train,t_train)
        train_acc_list.append(train_acc)
        test_acc = net.accuracy(x_test,t_test)
        test_acc_list.append(test_acc)
        print("train_acc, test_acc |" + str(train_acc)+"," +str(test_acc))

1501.260185385067
train_acc, test_acc |0.11236666666666667,0.1135
630.8295983209833
639.7727461016661
642.6260675042134
500.6054366396226
462.96656519077595
436.23937059806735
377.67533176778045
330.6068435705108
323.7524885427169
354.1803882765062
328.38228242905205
318.98436714742155
318.19194903676953
353.822475172065
437.35874780799026
372.67981185470524
384.68548151198195
334.48193758300886
330.6994712695993
289.52807278069236
313.11889842446385
513.9668313693431
444.4726727075929
430.4574058408622
391.53261602270356
362.2506773417435
355.865981381734
321.3240886797977
355.83584026566587
331.1141299395256
315.15382673145893
315.82970627287375
329.8104498769442
291.75822610255204
278.92038129134346
260.835159514855
255.10272375346435
283.8334934875036
320.62930830978826
301.40164459062004
293.50830134298974
301.39217874123665
299.257349440151
344.66806681412913
333.10834348867894
314.5044915574151
278.9716641062804
260.2705526842188
270.67711772311804
273.1670736535026
272.96085030