In [2]:
import os
import re
import struct
import tensorflow as tf
import numpy as np
# import matplotlib.pyplot as plt

dataset_dir = 'datasets/mnist_database/'
train_set_path = os.path.join(dataset_dir, 'train-images.idx3-ubyte')
train_label_path = os.path.join(dataset_dir, 'train-labels.idx1-ubyte')
test_set_path = os.path.join(dataset_dir, 't10k-images.idx3-ubyte')
test_label_path = os.path.join(dataset_dir, 't10k-labels.idx1-ubyte')

def load_data(file_path):
    
    if not os.path.exists(file_path):
        print("The file is not exist!")
        return
    
    fr_binary = open(file_path, 'rb')
    buffer = fr_binary.read()

    if re.search('\w+-(images)\.', os.path.split(file_path)[-1]) is not None:
        """
        提取数据文件
        """
        head = struct.unpack_from('>IIII', buffer, 0)
        offset = struct.calcsize('>IIII')       # 定位到字节流中 data 开始的位置
        img_num, width, height = head[1:]   
        format_str = '>{0}B'.format(img_num * width * height)
        data = struct.unpack_from(format_str, buffer, offset)
        fr_binary.close()
        data = np.reshape(data, [img_num, width, height])
        return data
    
    elif re.search('\w+-(labels)\.', file_path) is not None:
        """
        提取标签文件
        """
        head = struct.unpack_from('>II', buffer, 0)
        label_num = head[1]
        offset = struct.calcsize('>II')
        format_str = '>{0}B'.format(label_num)
        labels = struct.unpack_from(format_str, buffer, offset)
        fr_binary.close()
        labels = np.reshape(labels, [label_num])
        return labels
        
train_img = load_data(train_set_path)
train_label = load_data(train_label_path)
test_img = load_data(test_set_path)
test_label = load_data(test_label_path)

# 调整数据结构
def convert_one_hot(y, num_classes):
    
    y_one_hot = np.zeros((y.shape[-1], num_classes))
    for i in range(y.shape[-1]):
        y_one_hot[i][y[i]] = 1
    return y_one_hot
    
train_label_one_hot = convert_one_hot(train_label, 10)
test_label_one_hot = convert_one_hot(test_label, 10)
train_data = (np.reshape(train_img, (60000, 784)) / 255).astype(np.float32)
test_data = (np.reshape(test_img, (10000, 784)) / 255).astype(np.float32)

# 生成batches
def get_batches(X, y, batch_size, axis = 0, seed = 0):
    
    assert(X.shape[axis] == y.shape[axis])
    np.random.seed(seed)
    m = X.shape[axis]
    mini_batches = []
    permutation = list(np.random.permutation(m))
    num_complete_minibatches = m // batch_size
    
    if 0 == axis:
        shuffled_X = X[permutation, :]
        shuffled_y = y[permutation, :]
        for k in range(num_complete_minibatches):
            mini_batch_X = shuffled_X[k * batch_size: (k + 1) * batch_size, :]
            mini_batch_y = shuffled_y[k * batch_size: (k + 1) * batch_size, :]
            mini_batches.append((mini_batch_X, mini_batch_y))
        if m % batch_size != 0:
            mini_batch_X = shuffled_X[num_complete_minibatches * batch_size, :]
            mini_batch_y = shuffled_y[num_complete_minibatches * batch_size, :]
            mini_batches.append((mini_batch_X, mini_batch_y))
        return mini_batches
        
    elif 1 == axis:
        shuffled_X = X[:, permutation]
        shuffled_y = y[:, permutation]
        for k in range(num_complete_minibatches):
            mini_batch_X = shuffled_X[:, k * batch_size: (k + 1) * batch_size]
            mini_batch_y = shuffled_y[:, k * batch_size: (k + 1) * batch_size]
            mini_batches.append((mini_batch_X, mini_batch_y))
        if m % batch_size != 0:
            mini_batch_X = shuffled_X[:, num_complete_minibatches * batch_size]
            mini_batch_y = shuffled_y[:, num_complete_minibatches * batch_size]
            mini_batches.append((mini_batch_X, mini_batch_y))
        return mini_batches
    
# 搭建一个简单的神经网络（搭建计算图）
X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])
keep_prob = tf.placeholder(tf.float32)       # 用于存储保留节点的比例

# 设置网络第一层的权重及偏置
W1 = tf.Variable(tf.truncated_normal([784, 500], stddev = 0.1))   # 节点数为2000
b1 = tf.Variable(tf.zeros([1, 500]))
Z1 = tf.matmul(X, W1) + b1
L1 = tf.nn.tanh(Z1)
L1_drop = tf.nn.dropout(L1, keep_prob)

# 设置第二层网络
W2 = tf.Variable(tf.truncated_normal([500, 300], stddev = 0.1))   # 节点数为2000
b2 = tf.Variable(tf.zeros([1, 300]))
Z2 = tf.matmul(L1_drop, W2) + b2
L2 = tf.nn.tanh(Z2)
L2_drop = tf.nn.dropout(L2, keep_prob)

# 设置第三层网络
W3 = tf.Variable(tf.truncated_normal([300, 10], stddev = 0.1))
b3 = tf.Variable(tf.zeros([1, 10]))
Z3 = tf.matmul(L2_drop, W3) + b3
prediction = tf.nn.softmax(Z3)
# L3 = tf.nn.tanh(Z3)
# L3_drop = tf.nn.dropout(L3, keep_prob)

# # 设置输出层
# W4 = tf.Variable(tf.truncated_normal([100, 10], stddev = 0.1))
# b4 = tf.Variable(tf.zeros([1, 10]))
# Z4 = tf.matmul(L3_drop, W4) + b4
# prediction = tf.nn.softmax(Z4)

# 使用L2代价函数
# loss = tf.reduce_mean(tf.square(y - prediction))
# 使用cross entropy代价函数
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits = prediction))

# 使用梯度下降法
train_step = tf.train.GradientDescentOptimizer(0.01)
train = train_step.minimize(loss)

# 变量初始化
init = tf.global_variables_initializer()

# 计算准确率，结果存放在一个 boolean 型列表中
correction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1))   # argmax返回最大值所在的位置
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))

with tf.Session() as sess:       # 创建 session 训练网络
    sess.run(init)   # 先执行变量初始化
    mini_batches = get_batches(train_data, train_label_one_hot, batch_size = 100)
    for i in range(21):
        for (X_data, y_data) in mini_batches:
            sess.run(train, feed_dict = {X: X_data, y: y_data, keep_prob: 1.0})
            loss_value = sess.run(loss, feed_dict = {X: X_data, y: y_data, keep_prob: 1.0}) 
        if 0 == i % 1:
            acc = sess.run(accuracy, feed_dict = {X: test_data, y: test_label_one_hot, keep_prob: 1.0})   
            print("loss of iteration %d : %f" %(i, loss_value))
            print("accuracy is: %f" %(acc))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

loss of iteration 0 : 1.829579
accuracy is: 0.724600
loss of iteration 1 : 1.735072
accuracy is: 0.797400
loss of iteration 2 : 1.709098
accuracy is: 0.815200
loss of iteration 3 : 1.664822
accuracy is: 0.851000
loss of iteration 4 : 1.627203
accuracy is: 0.882700
loss of iteration 5 : 1.613127
accuracy is: 0.891400
loss of iteration 6 : 1.601581
accuracy is: 0.897900
loss of iteration 7 : 1.593241
accuracy is: 0.903200
loss of iteration 8 : 1.587226
accuracy is: 0.907900
loss of iteration 9 : 1.582638
accuracy is: 0.910800
loss of iteration 10 : 1.578928
accuracy is: 0.913600
loss of iteration 11 : 1.575781
accuracy is: 0.915500
loss of iteration 12 : 1.573021
accuracy is: 0.917000
loss of iteration 13 : 1.570552
accuracy is: 0.918700
loss of iteration 14 : 1.568310
accuracy is: 0.920200
loss of i