Copyright 2017 The XMU Authors. All Rights Reserved.
邱明、曾捷航、林金鹏、景丽婷

In [1]:

#如果要在Python 2.7的代码中直接使用Python 3.x的语法，可以通过__future__引入对应的模块
from __future__ import absolute_import
from __future__ import division
import argparse

#是只引入tensorflow.examples.tutorials.mnist包里的input_data类
from tensorflow.examples.tutorials.mnist import input_data

#给tensorflow包一个别称tf
import tensorflow as tf


# sigmod的导函数  
$ \sigma^\prime (x) = sigmoid(x)(1-sigmoid(x))$

In [2]:
def sigmaprime(x):
    return tf.multiply(tf.sigmoid(x),tf.subtract(tf.constant(1.0),tf.sigmoid(x)))

# 对一个Tensor进行多种数据汇总
计算平均值，标准差，最大值和最小值，以及输出柱状图

In [3]:
def variable_summaries(var, name):
    with tf.name_scope('summary_'+name):
        mean=tf.reduce_mean(var)
        tf.summary.scalar('mean',mean)  #输出平均值
        with tf.name_scope('stddev'):
            stddev=tf.sqrt(tf.reduce_mean(tf.square(var-mean)))
        tf.summary.scalar('stddev',stddev) #输出标准差
        tf.summary.scalar('max',tf.reduce_max(var)) #输出最大值
        tf.summary.scalar('min',tf.reduce_min(var)) #输出最小值
        tf.summary.histogram('histogram',var) #输出柱状图

# 参数定义

In [4]:
#定义神经网络的结构相关参数
ETA = 0.01   #学习率
EPOCHS = 2000  #训练次数
BATCH_SIZE = 1000    #批量数
TEST_EPOCHS = 10  #测试间隔

INPUT_NODE = 784 #输入节点数
OUTPUT_NODE = 10    #输出的节点数

#通过改变下面这个参数来改变中间神经元的个数
HIDDENLAYER_NODE = 30    #隐藏层的节点数
LOG_PATH='log/mnist_bp_2Layer/'  #图输出的目录

# 从网络上下载数据, 导入程序，放在mnist变量中

In [5]:
mnist = input_data.read_data_sets("./data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


name="x-input"是给变量起个名字，便于以后用名字表示这个placeholder

In [6]:
# 输入的图片
x = tf.placeholder(tf.float32, [None, INPUT_NODE], name="x-input")
# 输入图片的标签
y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name="y-input")

# z与权重W和偏置项b的关系   
$$ z_k^{l+1}=\sum_{j} W_{kj}^{l+1}x_j^l+b_k^{l+1} $$

# 激活函数   
$$sigmoid(z) = \frac{1}{1+e^{-z}} $$

In [7]:
#第一层
W_1 = tf.Variable(tf.zeros([INPUT_NODE, HIDDENLAYER_NODE]))
b_1 = tf.Variable(tf.zeros(HIDDENLAYER_NODE))
z_1 = tf.matmul(x, W_1) + b_1
y_1 = tf.sigmoid(z_1)
    

#第二层
W_2 = tf.Variable(tf.zeros([HIDDENLAYER_NODE, OUTPUT_NODE]))
b_2 = tf.Variable(tf.zeros(OUTPUT_NODE))
z_2 = tf.matmul(y_1, W_2) + b_2
y_2 = tf.sigmoid(z_2)

# 损失函数
$$C = \frac{1}{2}(y_2 - y\_)^2$$
$$\frac{\partial C} {\partial z} = (y_2 - y\_) \sigma^\prime(z)$$

In [8]:
quadratic_cost = tf.subtract(y_2,y_)

# 反向传播 
## 第二层的修改值
$$\delta_j^2 = \frac{\partial C}{\partial y_j^1}\sigma^\prime (z_j^2) $$     
$$\frac{\partial C}{\partial W_{kj}^2} = y_k^1\delta_j^2 $$
$$\frac{\partial C}{\partial b_j^2} = \delta_j^2$$
$$W^2 = W^2 - \eta\frac{\partial C}{\partial z^2} y^1$$
$$b^2 = b^2 - \eta\frac{\partial C}{\partial z^2} $$

In [9]:
d_z2 = tf.multiply(quadratic_cost, sigmaprime(z_2))
d_b2 = d_z2
d_w2 = tf.matmul(tf.transpose(y_1),d_z2)

## 第一层的修改值
$$\delta_j^1 = \sum_k{W_{kj}^2\delta_k^2\sigma^\prime (z_j^1)} $$     
$$\frac{\partial C}{\partial W_{kj}^1} = x\delta_j^1 $$
$$\frac{\partial C}{\partial b_j^1} = \delta_j^1$$
$$W^1 = W^1 - {\eta}x\delta_j^1$$
$$b^2 = b^2 - \eta\delta_j^1 $$

In [10]:
d_z1 = tf.multiply(tf.matmul(d_z2,tf.transpose(W_2)), sigmaprime(z_1))
d_b1 = d_z1
d_w1 = tf.matmul(tf.transpose(x),d_z1)
    
step = [
    tf.assign(W_1,
            tf.subtract(W_1, tf.multiply(ETA, d_w1)))
  , tf.assign(b_1,
            tf.subtract(b_1, tf.multiply(ETA,
                               tf.reduce_mean(d_b1, axis=[0]))))
  , tf.assign(W_2,
            tf.subtract(W_2, tf.multiply(ETA, d_w2)))
  , tf.assign(b_2,
            tf.subtract(b_2, tf.multiply(ETA,
                               tf.reduce_mean(d_b2, axis=[0]))))
]

# 使用测试数据集检验模型的正确率
## tf.argmax返回第一维最大值的索引,由于数据格式为类似[0,1,0,0,0,0,0,0,0,0],所以索引即为识别出的阿拉伯数字
## tf.equal判断张量中对应下表的值是否相等,相等返回true,不相等返回false

In [11]:
accuracy_mat = tf.equal(tf.argmax(y_2,1),tf.argmax(y_,1))
accuracy_result = tf.reduce_mean(tf.cast(accuracy_mat,tf.float32))

### 使用Summary把数据写入事件文件

In [12]:
#第一层
with tf.name_scope('layer1'):
    variable_summaries(W_1,'W_1')
    variable_summaries(b_1,'b_1')
    
#第二层
with tf.name_scope('layer2'):
    variable_summaries(W_2,'W_2')
    variable_summaries(b_2,'b_2')

with tf.name_scope('Accuracy'):
    tf.summary.scalar('accuracy_rate',accuracy_result)
    
#把所有的summary合到一张图上
merged = tf.summary.merge_all()

#设置训练和测试的Writer
train_writer = tf.summary.FileWriter(LOG_PATH+'/train_'+str(HIDDENLAYER_NODE)+'_'+str(BATCH_SIZE)+'_'+str(EPOCHS)+'_'+str(ETA),graph=tf.get_default_graph())
test_writer = tf.summary.FileWriter(LOG_PATH+'/test_'+str(HIDDENLAYER_NODE)+'_'+str(BATCH_SIZE)+'_'+str(EPOCHS)+'_'+str(ETA),graph=tf.get_default_graph())

# 运行模型

In [13]:
with tf.Session() as sess:
    # 初始化之前定义好的全部变量
    tf.global_variables_initializer().run()
  
    #对模型训练EPOCHS次
    #随机选取BATCH_SIZE个图像数据进行训练
    for i in range(EPOCHS):
        batch_xs, batch_ys = mnist.train.next_batch(BATCH_SIZE)
        summary,_=sess.run([merged,step], feed_dict={x: batch_xs, y_: batch_ys})
        # 把Summary加入到训练数据的Writer中
        train_writer.add_summary(summary,i)
        if i % TEST_EPOCHS == 0 :
            summary = sess.run(merged,feed_dict={x: mnist.test.images, y_: mnist.test.labels})
            test_writer.add_summary(summary,i)

train_writer.close()
test_writer.close()