In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from tensorflow import keras
from sklearn.model_selection import train_test_split
import numpy as np
from fastprogress import progress_bar

In [8]:
INPUT_NODE = 784
OUTPUT_NODE = 10
LAYER1_NODE = 500
BATCH_SIZE = 2000

LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 3000
MOVING_AVERAGE_DECAY = 0.99

In [3]:
#实现了前向传播算法，同时支持滑动平均算法
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2) + avg_class.average(biases2))

In [7]:
def train(mnist):
    x = tf.placeholder(tf.float32, [None, INPUT_NODE], name='x-input')
    y_ = tf.placeholder(tf.int64, name='y-input')
    
    weights1 = tf.Variable(tf.truncated_normal([INPUT_NODE, LAYER1_NODE], stddev=0.1))
    biases1 = tf.Variable(tf.constant(0.1, shape=[LAYER1_NODE]))
    
    weights2 = tf.Variable(tf.truncated_normal([LAYER1_NODE, OUTPUT_NODE], stddev=0.1))
    biases2 = tf.Variable(tf.constant(0.1, shape=[OUTPUT_NODE]))
    
    y = inference(x, None, weights1, biases1, weights2, biases2)
    
    global_step = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
    variable_averages_op = variable_averages.apply(tf.trainable_variables())
    
    average_y = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_, logits=y)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    regularization = regularizer(weights1) + regularizer(weights2)
    loss = cross_entropy_mean + regularization
    
    (x_train, y_train), (x_test, y_test) = mnist
    x_train = np.array([x.flatten() for x in x_train])
    x_test = np.array([x.flatten() for x in x_test])
    x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.1, random_state=42)
    
#     print('x_train shape: {}, x_test shape: {}'.format(x_train.shape, x_test.shape) )
#     print('y_train shape: {}, y_test shape: {}'.format(y_train.shape, y_test.shape))
#     print('x_val shape: {}, y_val shape: {}'.format(x_val.shape, y_val.shape))

    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE,
        global_step,
        x_train.shape[0] / BATCH_SIZE,
        LEARNING_RATE_DECAY
    )
    
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)

    with tf.control_dependencies([train_step, variable_averages_op]):
        # no_op(...): Does nothing. Only useful as a placeholder for control edges.
        train_op = tf.no_op(name='train')
    # 验证使用滑动平均模型的神经网络传播结果是否正确
    # tf.argmax() 函数的第二个参数“1” 表示选取最大值操作仅在第一个维度中进行。
    correct_prediction = tf.equal(tf.argmax(average_y, 1), y_)
    correct = tf.reduce_sum(tf.cast(correct_prediction, tf.int32))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    batch_num = x_train.shape[0] // BATCH_SIZE
    
    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        tf.initialize_all_variables().run()
        validate_feed = {
            x: x_val,
            y_: y_val
        }
    
        test_feed = {x: x_test, y_: y_test}
    
        for i in progress_bar(range(TRAINING_STEPS)):
            if i % 50 == 0:
                validate_acc = sess.run(accuracy, feed_dict=validate_feed)
                val_correct = sess.run(correct, feed_dict=validate_feed)
                print("After %d training step(s), validation accuracy using average model is %g correct is %g" % (i, validate_acc, val_correct))
            
            for j in range(batch_num):
                start = j * BATCH_SIZE
                end = (j + 1) * BATCH_SIZE
                xs, ys = x_train[start: end], y_train[start: end]
                sess.run([train_op, global_step], feed_dict={x: xs, y_: ys})
            
        test_acc = sess.run(accuracy, feed_dict={x: x_test, y_: y_test})
        print("After %d training step(s), test accuracy using average model is %g " % (TRAINING_STEPS, test_acc))

In [5]:
mnist = keras.datasets.mnist.load_data('/home/ycli/code/tensorflow_notes/data/mnist.npz')

In [9]:
train(mnist)

HBox(children=(IntProgress(value=0, max=3000), HTML(value='')))

HBox(children=(IntProgress(value=0, max=3000), HTML(value='0.00% [0/3000 00:00<00:00]')))

After 0 training step(s), validation accuracy using average model is 0.114667 correct is 688
After 50 training step(s), validation accuracy using average model is 0.104 correct is 624
After 100 training step(s), validation accuracy using average model is 0.104 correct is 624
After 150 training step(s), validation accuracy using average model is 0.104 correct is 624
After 200 training step(s), validation accuracy using average model is 0.104 correct is 624
After 250 training step(s), validation accuracy using average model is 0.104 correct is 624
After 300 training step(s), validation accuracy using average model is 0.104 correct is 624
After 350 training step(s), validation accuracy using average model is 0.104 correct is 624
After 400 training step(s), validation accuracy using average model is 0.104 correct is 624
After 450 training step(s), validation accuracy using average model is 0.104 correct is 624
After 500 training step(s), validation accuracy using average model is 0.104 cor