In [35]:
from tensorflow.examples.tutorials.mnist import input_data

In [3]:
mnist = input_data.read_data_sets('.', one_hot=True)

Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Extracting ./train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Extracting ./train-labels-idx1-ubyte.gz
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting ./t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting ./t10k-labels-idx1-ubyte.gz


In [9]:
mnist.train.num_examples

55000

In [10]:
mnist.test.num_examples

10000

In [11]:
mnist.validation.num_examples

5000

In [12]:
import tensorflow as tf

In [60]:
INPUT_NODE = 784                # 输入层节点数。对于MNIST相当于图片的像素
OUTPUT_NODE = 10                # 输出层节点数。等于类别的数目

LAYER1_NODE = 500               # 隐藏层节点数。
BATCH_SIZE = 100                # 批处理数。这个值越大，训练越接近梯度下降；越小，越接近随机梯度下降
LEARNING_RATE_RASE = 0.8        # 基础的学习率
LEARNING_RATE_DECAY = 0.99      # 学习率的衰减率
REGULARIZATION_RATE = 0.0001    # 描述模型复杂度的正则化项在损失函数中的系数
TRAINING_STEPS = 1500           # 训练轮数
MOVING_AVERAGE_DECAY = 0.99     # 滑动平均衰减率

一个辅助函数，给定神经网络的输入和所有参数，计算神经网络的前向传播结果。
这里定义一个使用ReLU激活函数的三层全连接神经网络。通过加入隐藏层实现了多层网络结构，通过ReLU激活函数实现了去线性化。
在这个函数中也支持传入用于计算参数平均值的类。

In [28]:
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

In [67]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    # 计算当前参数下的神经网络前向传播的结果。不使用滑动平均值
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义存储训练轮数的变量。不需要计算滑动平均值，所以定为不可训练的变量（trainable=False）。
    global_step = tf.Variable(0, trainable=False)
    # 给定滑动平均衰减率和训练轮数的变脸，初始化滑动平均类。
    # 给定训练轮数的变量可以加快训练早期变量的更新速度
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    # 在所有代表神经网络参数的变量上使用滑动平均。其他辅助变量，如global_step不需要
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    # 计算使用了滑动平均之后的前向传播结果。
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 计算交叉熵作为刻画预测值和真实值之间差距的损失函数。
    # 当分类问题只有一个正确答案的时候，可以使用sparse_softmax_cross_entropy_with_logits来加速交叉熵计算
    cross_entroy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    # 计算在当前batch中所有样例的交叉熵平均值
    cross_entroy_mean = tf.reduce_mean(cross_entroy)
    
    # 计算L2正则化损失函数
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    # 计算模型的正则化损失函数。一般不使用偏置项
    regularization = regularizer(weights1) + regularizer(weights2)
    # 总损失等于交叉熵损失和正则化损失的和
    loss = cross_entroy_mean + regularization
    
    # 设置指数衰减的学习率
    learning_rate = tf.train.exponential_decay(LEARNING_RATE_RASE, global_step,
                                               mnist.train.num_examples, LEARNING_RATE_DECAY)
    # 使用GradientDescentOptimizer优化算法来优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    # 训练神经网络模型时，每过一遍数据既需要通过反向传播来更新神经网络中的参数
    # 又要更新每一个参数的滑动平均值。
    with tf.control_dependencies([train_step, variable_averages_op]):
        train_op = tf.no_op(name='train')
    
    # 检验使用了滑动平均模型的神经网络前向传播结果是否正确
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    # 转换boolean为实数型，然后计算平均值。
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # 初始化会话并开始训练过程
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        # 准备验证数据。
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels}
        
        # 迭代训练神经网络
        for i in range(TRAINING_STEPS):
            if i % 100 == 0:
                # 计算滑动平均模型在验证数据上的结果。因为MNIST数据集比较小，所以一次可以处理所有验证数据。
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g" % (i, validate_acc))
            # 产生一轮使用的batch的训练数据，并运行训练过程
            xs, ys = mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x: xs, y_: ys})
        # 在训练结束后，测试数居上检测神经网络模型的最终正确率
        test_acc = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training step(s), test accuracy using average model is %g" % (TRAINING_STEPS, test_acc))
        

In [18]:
def main(argv=None):
    train(mnist)

In [45]:
tf.app.run()

Instructions for updating:
Use `tf.global_variables_initializer` instead.
After 0 training step(s), validation accuracy using average model is 0.1128
After 100 training step(s), validation accuracy using average model is 0.9352
After 200 training step(s), validation accuracy using average model is 0.9564
After 300 training step(s), validation accuracy using average model is 0.965
After 400 training step(s), validation accuracy using average model is 0.9692
After 500 training step(s), validation accuracy using average model is 0.9728
After 600 training step(s), validation accuracy using average model is 0.9734
After 700 training step(s), validation accuracy using average model is 0.975
After 800 training step(s), validation accuracy using average model is 0.9766
After 900 training step(s), validation accuracy using average model is 0.9778
After 1000 training step(s), validation accuracy using average model is 0.979
After 1100 training step(s), validation accuracy using average model is 

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [68]:
tf.app.run()

Instructions for updating:
Use `tf.global_variables_initializer` instead.
After 0 training step(s), validation accuracy using average model is 0.102
After 100 training step(s), validation accuracy using average model is 0.9338
After 200 training step(s), validation accuracy using average model is 0.9524
After 300 training step(s), validation accuracy using average model is 0.9602
After 400 training step(s), validation accuracy using average model is 0.9654
After 500 training step(s), validation accuracy using average model is 0.9674
After 600 training step(s), validation accuracy using average model is 0.9686
After 700 training step(s), validation accuracy using average model is 0.974
After 800 training step(s), validation accuracy using average model is 0.976
After 900 training step(s), validation accuracy using average model is 0.977
After 1000 training step(s), validation accuracy using average model is 0.9772
After 1100 training step(s), validation accuracy using average model is 0

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
