## MNIST database
手写数字识别的训练集，有60000个训练样本，10000个测试样本  
每一张图片包含 28 * 28 个像素，图片中的像素的强度值介于 0—1 之间  
1. **数据集的文件内容如下**  
train-images-idx3-ubyte.gz:  training set images (9912422 bytes)   
train-labels-idx1-ubyte.gz:  training set labels (28881 bytes)   
t10k-images-idx3-ubyte.gz:   test set images (1648877 bytes)   
t10k-labels-idx1-ubyte.gz:   test set labels (4542 bytes)  
2. **每个数据文件的内容大致可以分为以下3个部分**  
![image.png](attachment:image.png)

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

### 提取mnist文件中的数据

In [2]:
# 提取数据文件中的数据
import os
import re
import struct

dataset_dir = 'datasets/mnist_database/'
train_set_path = os.path.join(dataset_dir, 'train-images.idx3-ubyte')
train_label_path = os.path.join(dataset_dir, 'train-labels.idx1-ubyte')
test_set_path = os.path.join(dataset_dir, 't10k-images.idx3-ubyte')
test_label_path = os.path.join(dataset_dir, 't10k-labels.idx1-ubyte')

def load_data(file_path):
    
    if not os.path.exists(file_path):
        print("The file is not exist!")
        return
    
    fr_binary = open(file_path, 'rb')
    buffer = fr_binary.read()

    if re.search('\w+-(images)\.', os.path.split(file_path)[-1]) is not None:
        """
        提取数据文件
        """
        head = struct.unpack_from('>IIII', buffer, 0)
        offset = struct.calcsize('>IIII')       # 定位到字节流中 data 开始的位置
        img_num, width, height = head[1:]   
        format_str = '>{0}B'.format(img_num * width * height)
        data = struct.unpack_from(format_str, buffer, offset)
        fr_binary.close()
        data = np.reshape(data, [img_num, width, height])
        return data
    
    elif re.search('\w+-(labels)\.', file_path) is not None:
        """
        提取标签文件
        """
        head = struct.unpack_from('>II', buffer, 0)
        label_num = head[1]
        offset = struct.calcsize('>II')
        format_str = '>{0}B'.format(label_num)
        labels = struct.unpack_from(format_str, buffer, offset)
        fr_binary.close()
        labels = np.reshape(labels, [label_num])
        return labels
        
train_img = load_data(train_set_path)
train_label = load_data(train_label_path)
test_img = load_data(test_set_path)
test_label = load_data(test_label_path)

### 调整数据的格式

In [3]:
# 调整数据结构
def convert_one_hot(y, num_classes):
    
    y_one_hot = np.zeros((y.shape[-1], num_classes))
    for i in range(y.shape[-1]):
        y_one_hot[i][y[i]] = 1
    return y_one_hot
    
train_label_one_hot = convert_one_hot(train_label, 10)
test_label_one_hot = convert_one_hot(test_label, 10)
train_data = (np.reshape(train_img, (60000, 784)) / 255).astype(np.float32)
test_data = (np.reshape(test_img, (10000, 784)) / 255).astype(np.float32)

### 定义生成 mini_batch 的函数

In [4]:
# 生成batches
def get_batches(X, y, batch_size, axis = 0, seed = 0):
    
    assert(X.shape[axis] == y.shape[axis])
    np.random.seed(seed)
    m = X.shape[axis]
    mini_batches = []
    permutation = list(np.random.permutation(m))
    num_complete_minibatches = m // batch_size
    
    if 0 == axis:
        shuffled_X = X[permutation, :]
        shuffled_y = y[permutation, :]
        for k in range(num_complete_minibatches):
            mini_batch_X = shuffled_X[k * batch_size: (k + 1) * batch_size, :]
            mini_batch_y = shuffled_y[k * batch_size: (k + 1) * batch_size, :]
            mini_batches.append((mini_batch_X, mini_batch_y))
        if m % batch_size != 0:
            mini_batch_X = shuffled_X[num_complete_minibatches * batch_size, :]
            mini_batch_y = shuffled_y[num_complete_minibatches * batch_size, :]
            mini_batches.append((mini_batch_X, mini_batch_y))
        return mini_batches
        
    elif 1 == axis:
        shuffled_X = X[:, permutation]
        shuffled_y = y[:, permutation]
        for k in range(num_complete_minibatches):
            mini_batch_X = shuffled_X[:, k * batch_size: (k + 1) * batch_size]
            mini_batch_y = shuffled_y[:, k * batch_size: (k + 1) * batch_size]
            mini_batches.append((mini_batch_X, mini_batch_y))
        if m % batch_size != 0:
            mini_batch_X = shuffled_X[:, num_complete_minibatches * batch_size]
            mini_batch_y = shuffled_y[:, num_complete_minibatches * batch_size]
            mini_batches.append((mini_batch_X, mini_batch_y))
        return mini_batches
    
print(len(get_batches(test_data, test_label_one_hot, 10)))

1000


### 搭建计算图

In [8]:
# 搭建一个简单的神经网络（搭建计算图）
X = tf.placeholder(tf.float32, [None, 784])
y = tf.placeholder(tf.float32, [None, 10])

# 设置网络第一层的权重及偏置
W1 = tf.Variable(tf.zeros([784, 10]))   # 节点数为10
b1 = tf.Variable(tf.zeros([1, 10]))
Z1 = tf.matmul(X, W1) + b1
prediction = tf.nn.softmax(Z1)

# 使用L2代价函数
# loss = tf.reduce_mean(tf.square(y - prediction))
# 使用cross entropy代价函数
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits = prediction))

# 使用梯度下降法
train_step = tf.train.GradientDescentOptimizer(0.1)
train = train_step.minimize(loss)

# 变量初始化
init = tf.global_variables_initializer()

# 计算准确率，结果存放在一个 boolean 型列表中
correction = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1))   # argmax返回最大值所在的位置
accuracy = tf.reduce_mean(tf.cast(correction, tf.float32))

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



### 执行计算图

In [9]:
with tf.Session() as sess:       # 创建 session 训练网络
    sess.run(init)   # 先执行变量初始化
    mini_batches = get_batches(train_data, train_label_one_hot, batch_size = 100)
    for i in range(201):
        for (X_data, y_data) in mini_batches:
            sess.run(train, feed_dict = {X: X_data, y: y_data})
            loss_value = sess.run(loss, feed_dict = {X: X_data, y: y_data}) 
        if 0 == i % 20:
            acc = sess.run(accuracy, feed_dict = {X: test_data, y: test_label_one_hot})   
            print("loss of iteration %d : %f" %(i, loss_value))
            print("accuracy is: %f" %(acc))
    

loss of iteration 0 : 1.672073
accuracy is: 0.864900
loss of iteration 20 : 1.550155
accuracy is: 0.924900
loss of iteration 40 : 1.541093
accuracy is: 0.927700
loss of iteration 60 : 1.535237
accuracy is: 0.928400
loss of iteration 80 : 1.530826
accuracy is: 0.929400
loss of iteration 100 : 1.527375
accuracy is: 0.929400
loss of iteration 120 : 1.524521
accuracy is: 0.929700
loss of iteration 140 : 1.522393
accuracy is: 0.930600
loss of iteration 160 : 1.520897
accuracy is: 0.930200
loss of iteration 180 : 1.519747
accuracy is: 0.930500
loss of iteration 200 : 1.518789
accuracy is: 0.930400
