In [31]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data 

#### 1.设置输入和输出节点的个数,配置神经网络的参数。

In [32]:
INPUT_NODE = 784     # 输入节点 28*28像素灰度手写数字图像
OUTPUT_NODE = 10     # 输出节点 0~9十个数字
LAYER1_NODE = 500    # 隐藏层节点数 此处我只设置了一个隐藏层       
                              
BATCH_SIZE = 100     #打包的样本个数/batch        

# 模型相关的参数
LEARNING_RATE_BASE = 0.8      #基础学习率    
LEARNING_RATE_DECAY = 0.99    #学习率下降参数
REGULARAZTION_RATE = 0.0001   #正则化项目在损失函数中的参数
TRAINING_STEPS = 6000         #训练步长
MOVING_AVERAGE_DECAY = 0.99   

#### 2. 定义辅助函数来计算前向传播结果，使用ReLU做为激活函数。

In [33]:
#三层全连接网络
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):

    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2

    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)  

#### 3. 定义训练过程。

In [34]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_NODE], name='y-input')
    # 生成隐藏层
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    # 生成输出层
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))

   
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 定义训练轮数及相关的滑动平均类 
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 定义计算cross entropy及其平均值
    # softmax + cross entropy
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 定义loss function，并防止过拟合进行正则化
    regularizer = tf.contrib.layers.l2_regularizer(REGULARAZTION_RATE)
    regularaztion = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularaztion
    
    # exponential decay（学习率指数衰减）
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        mnist.train.num_examples / BATCH_SIZE,
        LEARNING_RATE_DECAY,
        staircase=True)
    
    # 优化损失函数
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
    
    # 反向传播更新参数
    train_op = tf.group(train_step, variables_averages_op)    

    # 计算正确率
    #tf.argmax为获取正确标签数
    correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    
    # 开始初始化会话，并开始训练过程。
    with tf.Session() as sess:
      
        tf.global_variables_initializer().run()
        validate_feed = {x: mnist.validation.images, y_: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y_: mnist.test.labels} 
        
        # 往复run训练神经网络。
        for i in range(TRAINING_STEPS):
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g " % (i, validate_acc))
            
            xs,ys=mnist.train.next_batch(BATCH_SIZE)
            sess.run(train_op, feed_dict={x:xs,y_:ys})

        test_acc=sess.run(accuracy,feed_dict=test_feed)
        print(("After %d training step(s), test accuracy using average model is %g" %(TRAINING_STEPS, test_acc)))
        writer = tf.summary.FileWriter("log", tf.get_default_graph())

#### 4. 这里设定模型训练次数为6000次，读取数据，运行主程序。

In [35]:
%%time
def main(argv=None):
    mnist = input_data.read_data_sets("data/", one_hot=True)
    train(mnist)

if __name__=='__main__':
    main()
   

Extracting data/train-images-idx3-ubyte.gz
Extracting data/train-labels-idx1-ubyte.gz
Extracting data/t10k-images-idx3-ubyte.gz
Extracting data/t10k-labels-idx1-ubyte.gz
After 0 training step(s), validation accuracy using average model is 0.121 
After 1000 training step(s), validation accuracy using average model is 0.9792 
After 2000 training step(s), validation accuracy using average model is 0.9838 
After 3000 training step(s), validation accuracy using average model is 0.9846 
After 4000 training step(s), validation accuracy using average model is 0.9846 
After 5000 training step(s), validation accuracy using average model is 0.9848 
After 6000 training step(s), test accuracy using average model is 0.9834
Wall time: 1min 26s
