In [661]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import math
import numpy as np

In [662]:
mnist = input_data.read_data_sets("./MNIST_data_set/", one_hot=True)

Extracting ./MNIST_data_set/train-images-idx3-ubyte.gz
Extracting ./MNIST_data_set/train-labels-idx1-ubyte.gz
Extracting ./MNIST_data_set/t10k-images-idx3-ubyte.gz
Extracting ./MNIST_data_set/t10k-labels-idx1-ubyte.gz


In [663]:
print("The shape of train set x is:", mnist.train.images.shape)
print("The shape of train set y is:", mnist.train.labels.shape)
print("The shape of validation set x is:", mnist.validation.images.shape)
print("The shape of validation set y is:", mnist.validation.labels.shape)
print("The shape of test set x is:", mnist.test.images.shape)
print("The shape of test set y is:", mnist.test.labels.shape)

The shape of train set x is: (55000, 784)
The shape of train set y is: (55000, 10)
The shape of validation set x is: (5000, 784)
The shape of validation set y is: (5000, 10)
The shape of test set x is: (10000, 784)
The shape of test set y is: (10000, 10)


In [664]:
# 输入数据函数
def input_data(Input_node, Output_node):
    x = tf.placeholder(tf.float32, shape=[None,Input_node], name='input_x')
    y = tf.placeholder(tf.float32, shape=[None,Output_node], name='output_y')
    return x,y

In [665]:
# 参数初始化
def parameter_initialize(Input_node, Layer1_node, Output_node):
    W1 = tf.Variable(tf.truncated_normal([Input_node, Layer1_node], stddev=0.1))
    b1 = tf.Variable(tf.constant(0.1, shape=[Layer1_node]))
    W2 = tf.Variable(tf.truncated_normal([Layer1_node, Output_node], stddev=0.1))
    b2 = tf.Variable(tf.constant(0.1, shape=[Output_node]))
    parameter = {"W1":W1,"b1":b1,
                 "W2":W2,"b2":b2}
    return parameter

In [666]:
# 前向过程
def inference(input_tensor, parameter):
    W1 = parameter["W1"]
    b1 = parameter["b1"]
    W2 = parameter["W2"]
    b2 = parameter["b2"]
    layer1 = tf.nn.relu(tf.matmul(input_tensor, W1) + b1)
    return tf.nn.relu(tf.matmul(layer1, W2) + b2)

In [667]:
# 定义损失函数 
def loss(y_pred, y, parameter):
    W1 = parameter["W1"]
    W2 = parameter["W2"]
    cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y_pred, labels=tf.argmax(y,1)))
    # 定义L2正则项
    regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
    regularition = regularizer(W1) + regularizer(W2)
    # 交叉熵与L2正则项相加，作为损失函数
    cost = cross_entropy + regularition
    return cost

In [668]:
def train(Learning_rate_base, Learning_rate_decay, mini_batch_size, cost):
    ## 学习率指数衰减函数
    ## tf.train.exponential_decay(learning_rate_base,  global_step,  decay_steps,  learning_rate_decay,  staircase=False or True,  name(optional))
    ## decayed_learning_rate = learning_rate_base *learning_rate_decay^ (global_step/decay_steps)
    ## 如果staircase=True,global_step/decay_steps为整除；如果staircase=True,global_step/decay_steps为正常除
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(Learning_rate_base, global_step, mnist.train.num_examples / mini_batch_size, learning_rate_decay, staircase=True)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost,global_step=global_step)
    return train_step

In [669]:
# 定义评估函数，此程序以准确度作为评估指标
def evaluate(y, y_pred):
    correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_pred,1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy

In [670]:
# 创建mini_batch函数，将训练集划分成若干个mini_batch_size大小的子集

def random_mini_batches(x, y, mini_batch_size):
    """
    Creates a list of random minibatches from (X, Y)

    Arguments:
    X -- input data, of shape (input size, number of examples)
    Y -- true "label" vector (1 for blue dot / 0 for red dot), of shape (1, number of examples)
    mini_batch_size -- size of the mini-batches, integer

    Returns:
    mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
    """
    m = x.shape[0]                  # number of training examples
    mini_batches = []

    # Step 1: Shuffle (X, Y)
    permutation = list(np.random.permutation(m))
    shuffled_x = x[permutation, :]
    shuffled_y = y[permutation, :]

    # Step 2: Partition (shuffled_x, shuffled_y). Minus the end case.
    num_complete_minibatches = math.floor(m/mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
    for k in range(0, num_complete_minibatches):
        ### START CODE HERE ### (approx. 2 lines)
        mini_batch_x = shuffled_x[k*mini_batch_size : (k+1)*mini_batch_size, :]
        mini_batch_y = shuffled_y[k*mini_batch_size : (k+1)*mini_batch_size, :]
        ### END CODE HERE ###
        mini_batch = (mini_batch_x, mini_batch_y)
        mini_batches.append(mini_batch)

    # Handling the end case (last mini-batch < mini_batch_size)
    if m % mini_batch_size != 0:
        ### START CODE HERE ### (approx. 2 lines)
        mini_batch_x = shuffled_x[num_complete_minibatches*mini_batch_size : m, :]
        mini_batch_y = shuffled_y[num_complete_minibatches*mini_batch_size : m, :]
        ### END CODE HERE ###
        mini_batch = (mini_batch_x, mini_batch_y)
        mini_batches.append(mini_batch)
    return mini_batches

In [671]:
def model(Input_node, Layer1_model, Output_node, Learning_rate_base, Learning_rate_decay):
    x, y = input_data(Input_node, Output_node)
    parameter = parameter_initialize(Input_node, Layer1_node, Output_node)
    y_pred = inference(x, parameter)
    cost = loss(y_pred, y, parameter)
    train_step = train(Learning_rate_base, Learning_rate_decay, mini_batch_size, cost)
    accuracy = evaluate(y, y_pred)
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        validate_feed = {x: mnist.validation.images, y: mnist.validation.labels}
        test_feed = {x: mnist.test.images, y: mnist.test.labels}
        

        # 遍历整个训练集多次
        for i in range(training_steps):
            # 每遍历一次训练集，计算验证集的准确度
            validate_accuracy = sess.run(accuracy, feed_dict=validate_feed)
            print("After %d training steps ,validation accuracy is %g" % (i, validate_accuracy))
            minibatches = random_mini_batches(mnist.train.images, mnist.train.labels, mini_batch_size)
            # 遍历整个训练集一次
            for minibatch in minibatches:
                (x_batch, y_batch) = minibatch
                sess.run(train_step, feed_dict={x: x_batch, y: y_batch})
        
        test_accuracy = sess.run(accuracy, feed_dict=test_feed)
        print("After %d training steps ,test accuracy is %g" % (training_steps, test_accuracy))

In [672]:
# 定义输入层，隐藏层和输出层的节点数量
Input_node = 784
Layer1_node = 500
Output_node = 10
# 定义每一训练批次的样本数量
mini_batch_size = 64
# 模型相关参数
learning_rate_base = 0.1      #基础学习率
learning_rate_decay = 0.99  #学习率衰减因子
regularization_rate = 0.001  #L2正则化因子
training_steps = 20       #遍历整个训练集的次数
model(Input_node, Layer1_node, Output_node, learning_rate_base, learning_rate_decay)

After 0 training steps ,validation accuracy is 0.0842
After 1 training steps ,validation accuracy is 0.934
After 2 training steps ,validation accuracy is 0.9552
After 3 training steps ,validation accuracy is 0.963
After 4 training steps ,validation accuracy is 0.9684
After 5 training steps ,validation accuracy is 0.9676
After 6 training steps ,validation accuracy is 0.972
After 7 training steps ,validation accuracy is 0.9742
After 8 training steps ,validation accuracy is 0.9716
After 9 training steps ,validation accuracy is 0.9742
After 10 training steps ,validation accuracy is 0.976
After 11 training steps ,validation accuracy is 0.977
After 12 training steps ,validation accuracy is 0.9766
After 13 training steps ,validation accuracy is 0.976
After 14 training steps ,validation accuracy is 0.9782
After 15 training steps ,validation accuracy is 0.9774
After 16 training steps ,validation accuracy is 0.976
After 17 training steps ,validation accuracy is 0.9776
After 18 training steps ,va