In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
# 加载sklearn自带的mnist数据
digits = datasets.load_digits()
# 数据集包含1797个手写体数字的图片，图片大小为8*8
# 数字大小0～10，也就是说有这是个10分类问题
images = digits.images
targets = digits.target
print(("dataset shape is: "), images.shape)
# 将数据分为训练数据和测试数据（20%）
X_train,X_test,y_train,y_test = train_test_split(images , targets , test_size=0.2 , random_state=0)
num_training = 1137
num_validation = 300
num_test = y_test.shape[0]
# 将训练集再分为训练集和验证集
mask = list(range(num_training, num_training + num_validation))
X_val = X_train[mask]
y_val = y_train[mask]
mask = list(range(num_training))
X_train = X_train[mask]
y_train = y_train[mask]
mask = list(range(num_test))
X_test = X_test[mask]
y_test = y_test[mask]
print("the number of train: ", num_training)
print("the number of test: ", num_test)
print("the number of validation: ", num_validation)
# 将每个数字8*8的像素矩阵转化为64*1的向量
X_train = X_train.reshape(num_training, -1)
X_val = X_val.reshape(num_validation, -1)
X_test = X_test.reshape(num_test, -1)
print("training data shape: ", X_train.shape)
print("validation data shape: ", X_val.shape)
print("test data shape: ", X_test.shape)


dataset shape is:  (1797, 8, 8)
the number of train:  1137
the number of test:  360
the number of validation:  300
training data shape:  (1137, 64)
validation data shape:  (300, 64)
test data shape:  (360, 64)


In [2]:
# 定义神经网络的参数
# 定义超参
input_size = 64
hidden_size = 30
num_classes = 10
# 为了之后使用的方便，我将参数初始化，计算loss，训练，预测的过程都定义在一个名为network的类中 
import numpy as np
import matplotlib.pyplot as plt
from PycharmProject.cnn import network #从自定义的类中引出


# 训练30个隐藏节点的网络

In [3]:
net = network(input_size, hidden_size, num_classes)

stats = net.train(X_train, y_train, X_val, y_val,
            num_iters=5000, batch_size=200,
            learning_rate=0.01, learning_rate_decay=0.95,
            reg=0.25, verbose=True)
train_acc = (net.predict(X_train) == y_train).mean()
print('Train accuracy: ', train_acc)

val_acc = (net.predict(X_val) == y_val).mean()
print('Validation accuracy: ', val_acc)

test_acc = (net.predict(X_test) == y_test).mean()
print('test accuracy: ', test_acc)


iteration 0 / 5000: loss 2.302587
iteration 100 / 5000: loss 2.301861
iteration 200 / 5000: loss 2.206225
iteration 300 / 5000: loss 1.296984
iteration 400 / 5000: loss 0.941002
iteration 500 / 5000: loss 0.785880
iteration 600 / 5000: loss 0.827294
iteration 700 / 5000: loss 0.774106
iteration 800 / 5000: loss 0.815422
iteration 900 / 5000: loss 0.768211
iteration 1000 / 5000: loss 0.787185
iteration 1100 / 5000: loss 0.782876
iteration 1200 / 5000: loss 0.791674
iteration 1300 / 5000: loss 0.742254
iteration 1400 / 5000: loss 0.780427
iteration 1500 / 5000: loss 0.780346
iteration 1600 / 5000: loss 0.702118
iteration 1700 / 5000: loss 0.714850
iteration 1800 / 5000: loss 0.743797
iteration 1900 / 5000: loss 0.811183
iteration 2000 / 5000: loss 0.738605
iteration 2100 / 5000: loss 0.756463
iteration 2200 / 5000: loss 0.718344
iteration 2300 / 5000: loss 0.729844
iteration 2400 / 5000: loss 0.759118
iteration 2500 / 5000: loss 0.725241
iteration 2600 / 5000: loss 0.767802
iteration 270

# 设计一个动态增加隐藏层节点的过程，从1开始，一直到训练准确率达到一个值为止。相当于每次训练的网络不一样，来找合适的超参数hiddensize。

# 设计一个可以扩充隐藏层节点的网络，从1个节点开始，一个循环结束，如果达不到准确率，增加一个节点然后继续训练

# 定义一个函数来重复100次，取平均训练时间，addnode1是扩充模式

In [4]:
def addnode1(initnode):
    #扩充节点模式，之前训练的参数保留
    dy_hidden_size  = initnode 
    init_train_acc = 0
    init_test_acc = 0
    net1 = network(input_size, dy_hidden_size, num_classes)
    for i in range(1,200):
        stats = net1.train(X_train, y_train, X_val, y_val,
                    num_iters=5000, batch_size=200,
                    learning_rate=0.01, learning_rate_decay=0.95,
                    reg=0.25, verbose=False)

        train_acc = (net1.predict(X_train) == y_train).mean()
#         print('Train accuracy: ', train_acc)
        test_acc = (net1.predict(X_test) == y_test).mean()
#         print('test accuracy: ', test_acc)
        if train_acc >0.96 and train_acc > test_acc:
            print('Terminal hidden nodes number: ', dy_hidden_size)
            break

        #增加一个隐藏层节点，就是给权重矩阵扩充一列
        dy_hidden_size = dy_hidden_size + 1

        #给输入层的权重矩阵增加一列
        c = np.random.randn(64,1)
        net1.params_['W1'] = np.hstack((net1.params_['W1'],c))
        #给输入层的偏置增加一个0
        net1.params_['b1'] = np.append(net1.params_['b1'], np.zeros(1))
        #给输入层的权重矩阵增加一行
        d = np.random.randn(1,10)
        net1.params_['W2'] = np.vstack((net1.params_['W2'],d))
    return dy_hidden_size

In [5]:
%%time
import datetime
time1 = []
node1 = []
for i in range(10):
    begin = datetime.datetime.now()
    ternode = addnode1(initnode=1)
    end = datetime.datetime.now()
    se = end - begin
    time1  = np.append(time1,se.seconds)
    node1 = np.append(node1,ternode)
print('avarge time: ', time1.mean())
print('avarge nodes: ', node1.mean())

Terminal hidden nodes number:  10
Terminal hidden nodes number:  12
Terminal hidden nodes number:  11
Terminal hidden nodes number:  10
Terminal hidden nodes number:  9
Terminal hidden nodes number:  9
Terminal hidden nodes number:  15
Terminal hidden nodes number:  10
Terminal hidden nodes number:  14
Terminal hidden nodes number:  14
avarge time:  15.4
avarge nodes:  11.4
Wall time: 2min 39s


# addnode2是加节点模式

In [7]:
def addnode2(initnode):
    dy_hidden_size  = 1 
    init_train_acc = 0
    init_test_acc = 0
    for dy_hidden_size in range(1,200):
        net2 = network(input_size, dy_hidden_size, num_classes)
        stats = net2.train(X_train, y_train, X_val, y_val,
                    num_iters=5000, batch_size=200,
                    learning_rate=0.01, learning_rate_decay=0.95,
                    reg=0.25, verbose=False)

        train_acc = (net2.predict(X_train) == y_train).mean()
#         print('Train accuracy: ', train_acc)
        test_acc = (net2.predict(X_test) == y_test).mean()
#         print('test accuracy: ', test_acc)
        if test_acc >0.95 and train_acc > test_acc:
            print('Terminal hidden nodes number: ', dy_hidden_size)
            break

        dy_hidden_size = dy_hidden_size + 1

       
    return dy_hidden_size

In [8]:
%%time
import datetime
time2 = []
node2 = []
for i in range(10):
    begin = datetime.datetime.now()
    ternode = addnode2(initnode=2)
    end = datetime.datetime.now()
    se = end - begin
    time2  = np.append(time2,se.seconds)
    node2 = np.append(node2,ternode)
print('avarge time: ', time2.mean())
print('avarge nodes: ', node2.mean())

Terminal hidden nodes number:  10
Terminal hidden nodes number:  7
Terminal hidden nodes number:  10
Terminal hidden nodes number:  10
Terminal hidden nodes number:  11
Terminal hidden nodes number:  12
Terminal hidden nodes number:  11
Terminal hidden nodes number:  11
Terminal hidden nodes number:  10
Terminal hidden nodes number:  8
avarge time:  13.1
avarge nodes:  10.0
Wall time: 2min 17s


# 定义一个函数，扩充节点的时候，新节点的权重为0，这样保持输出期望一致

In [7]:
def addnode3(initnode):
    #扩充节点模式，之前训练的参数保留
    dy_hidden_size  = initnode 
    init_train_acc = 0
    init_test_acc = 0
    net3 = network(input_size, dy_hidden_size, num_classes)
    for i in range(1,200):
        stats = net3.train(X_train, y_train, X_val, y_val,
                    num_iters=5000, batch_size=200,
                    learning_rate=0.01, learning_rate_decay=0.95,
                    reg=0.25, verbose=False)

        train_acc = (net3.predict(X_train) == y_train).mean()
#         print('Train accuracy: ', train_acc)
        test_acc = (net3.predict(X_test) == y_test).mean()
#         print('test accuracy: ', test_acc)
        if test_acc >0.95 and train_acc > test_acc:
            print('Terminal hidden nodes number: ', dy_hidden_size)
            break

        #增加一个隐藏层节点，就是给权重矩阵扩充一列
        dy_hidden_size = dy_hidden_size + 1

        #给输入层的权重矩阵增加一列,权重为已经训练过的节点的权重
        c = net3.params_['W1'][:,0].reshape(64,1)
        net3.params_['W1'] = np.hstack((net3.params_['W1'],c))
        #给输入层的偏置增加一个0
        net3.params_['b1'] = np.append(net3.params_['b1'], np.zeros(1))
        #给输入层的权重矩阵增加一行,权重为0
        d = net3.params_['W2'][0,:].reshape(1,10)
        net3.params_['W2'] = np.vstack((net3.params_['W2'],d))
    return dy_hidden_size

In [None]:
%%time
import datetime
time3 = []
node3 = []
for i in range(10):
    begin = datetime.datetime.now()
    ternode = addnode3(initnode=2)
    end = datetime.datetime.now()
    se = end - begin
    time3  = np.append(time3,se.seconds)
    node3 = np.append(node3,ternode)
print('avarge time: ', time3.mean())
print('avarge nodes: ', node3.mean())

Terminal hidden nodes number:  15


## 做一个函数addnode4，每次扩张节点，把前面的权重都重新分配一下，新增的节点权重是前面所有的加起来求平均，原来的权重都要乘以n-1/n 这样期望不变

In [6]:
def addnode4(initnode):
    #扩充节点模式，之前训练的参数保留
    dy_hidden_size  = initnode 
    init_train_acc = 0
    init_test_acc = 0
    net4 = network(input_size, dy_hidden_size, num_classes)
    for i in range(1,200):
        stats = net4.train(X_train, y_train, X_val, y_val,
                    num_iters=5000, batch_size=200,
                    learning_rate=0.01, learning_rate_decay=0.95,
                    reg=0.25, verbose=False)

        train_acc = (net4.predict(X_train) == y_train).mean()
#         print('Train accuracy: ', train_acc)
        test_acc = (net4.predict(X_test) == y_test).mean()
#         print('test accuracy: ', test_acc)
        if train_acc >0.96 and train_acc > test_acc:
            print('Terminal hidden nodes number: ', dy_hidden_size)
            break

        #增加一个隐藏层节点，就是给权重矩阵扩充一列
        p = dy_hidden_size/dy_hidden_size + 1
        dy_hidden_size = dy_hidden_size + 1
        
        #给输入层的权重矩阵增加一列,权重为已经训练过的节点的权重的平均值（按行）,再把前面的做一个拉伸，乘以n-1/n
        new = net4.params_['W1'].sum(axis=1)/dy_hidden_size
        new = new.reshape(64,1)
        old = net4.params_['W1']*p
        net4.params_['W1'] = np.hstack((old,new))
#         print("new=",new)
        #给输入层的偏置增加一个0
        net4.params_['b1'] = np.append(net4.params_['b1'], np.zeros(1))
        #给输入层的权重矩阵增加一行,权重为已经训练过的节点的权重的平均值（按列）,再把前面的做一个拉伸，乘以n-1/n
        new1 = net4.params_['W2'].sum(axis=0)/dy_hidden_size
        new1 = new1.reshape(1,10)
        old1 = net4.params_['W2']*p
#         print("new1=",new1)
        net4.params_['W2'] = np.vstack((old1,new1))
    return dy_hidden_size

In [7]:
%%time
import datetime
time4 = []
node4 = []
for i in range(10):
    begin = datetime.datetime.now()
    ternode = addnode4(initnode=2)
    end = datetime.datetime.now()
    se = end - begin
    time4  = np.append(time4,se.seconds)
    node4 = np.append(node4,ternode)
print('avarge time: ', time4.mean())
print('avarge nodes: ', node4.mean())

Terminal hidden nodes number:  7
Terminal hidden nodes number:  7
Terminal hidden nodes number:  8
Terminal hidden nodes number:  9
Terminal hidden nodes number:  7
Terminal hidden nodes number:  8
Terminal hidden nodes number:  8
Terminal hidden nodes number:  7
Terminal hidden nodes number:  9
Terminal hidden nodes number:  7
avarge time:  8.4
avarge nodes:  7.7
Wall time: 1min 29s


# 