# Minst 手写数字识别

## 多层神经网络建模

In [1]:
import tensorflow as tf
import tensorflow.keras as keras

print(tf.__version__)

2.7.0


## 载入数据

In [2]:
# 获取 Mints手写数字数据集
mnist = keras.datasets.mnist

# 训练数据，测试数据
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

## 划分验证集

In [3]:
# 训练集总数，以次数据为准将训练集划分为训练集与验证集
total_num = len(train_images)

# 验证集占训练集的比例
valid_split = 0.2

# 实际训练集数量
train_num = int(total_num * (1 - valid_split))

# 训练集（0% ~ 80%）
train_x = train_images[:train_num]
train_y = train_labels[:train_num]

# 剩余部分为验证集（80% ~ 100%）
valid_x = train_images[train_num:]
valid_y = train_labels[train_num:]

test_x = test_images
test_y = test_labels

## 数据降维

In [4]:
# 把（28，28）的结构拉直为 1行 784列
# -1代表由系统计算行数，指定列数为784列
train_x = train_x.reshape(-1, 784)
valid_x = valid_x.reshape(-1, 784)
test_x = test_x.reshape(-1, 784)

## 特征数据归一化

In [5]:
train_x = tf.cast(train_x / 255.0, tf.float32)
valid_x = tf.cast(valid_x / 255.0, tf.float32)
test_x = tf.cast(test_x / 255.0, tf.float32)

## 独热编码

In [6]:
# 对标签数据进行独热编码
train_y = tf.one_hot(train_y, depth=10)
valid_y = tf.one_hot(valid_y, depth=10)
test_y = tf.one_hot(test_y, depth=10)

## 创建待优化变量

In [7]:
# 输入（784） -> 第一层隐层（64） -> 输出层（10）
# 定义第一层隐藏层权重和偏置项变量
Input_Dim = 784
H1_NN = 64

W1 = tf.Variable(tf.random.normal([Input_Dim, H1_NN], mean=0.0, stddev=1.0, dtype=tf.float32))
B1 = tf.Variable(tf.zeros([H1_NN]), dtype=tf.float32)

In [8]:
# 定义第二层隐藏层权重和偏置项变量
H2_NN = 32

W2 = tf.Variable(tf.random.normal([H1_NN, H2_NN], mean=0.0, stddev=1.0, dtype=tf.float32))
B2 = tf.Variable(tf.zeros([H2_NN]), dtype=tf.float32)

In [9]:
# 定义输出层权重和偏置项变量
Output_Dim = 10

W3 = tf.Variable(tf.random.normal([H2_NN, Output_Dim], mean=0.0, stddev=1.0, dtype=tf.float32))
B3 = tf.Variable(tf.zeros([Output_Dim]), dtype=tf.float32)

In [10]:
# 建立待优化列表
W = [W1, W2, W3]
B = [B1, B2, B3]

## 定义模型前向计算

In [11]:
# 扩展多分类模型
# 模型计算后进行Softmax分类，决定属于哪个标签分类（本例为10分类）
def model(x, w, b):
    x = tf.matmul(x, w[0]) + b[0]
    x = tf.nn.relu(x)
    x = tf.matmul(x, w[1]) + b[1]
    x = tf.nn.relu(x)
    x = tf.matmul(x, w[2]) + b[2]
    pred = tf.nn.softmax(x)

    # 返回预测标签值
    return pred

## 定义交叉熵函数

In [12]:
# 定义交叉熵损失函数
def loss(x, y, w, b):
    # 前向计算
    pred = model(x, w, b)

    # 计算模型预测值与真实值差异
    loss_ = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)

    # 求均方差
    return tf.reduce_mean(loss_)

## 设置训练参数

In [13]:
train_epochs = 20
batch_size = 50
learning_rate = 0.001

## 定义梯度计算函数

In [14]:
# 计算样本数据[x, y]在参数[w, b]上的梯度
def grad(x, y, w, b):
    var_list = w + b
    with tf.GradientTape() as tape:
        loss_ = loss(x, y, w, b)

    # 返回梯度向量
    return tape.gradient(loss_, var_list)

## 选择优化器

In [15]:
# Adam优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

## 定义准确率

In [16]:
def accuracy(x, y, w, b):
    # 计算模型预测标签值与标签值的差异
    pred = model(x, w, b)

    # 检查预测类别tf.argmax(pred, 1)与实际类别tf.argmax(y, 1)的匹配情况
    # 匹配成功返回 True
    # 匹配失败返回 False
    correct_predication = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

    # 将匹配结果转换为float32输出，得出准确率
    return tf.reduce_mean(tf.cast(correct_predication, tf.float32))

## 模型训练

In [17]:
total_step = int(train_num / batch_size)

loss_list_train = []
loss_list_valid = []
acc_list_train = []
acc_list_valid = []

for epoch in range(train_epochs):
    for step in range(total_step):
        xs = train_x[step * batch_size:(step + 1) * batch_size]
        ys = train_y[step * batch_size:(step + 1) * batch_size]

        grads = grad(xs, ys, W, B)
        optimizer.apply_gradients(zip(grads, W + B))

    loss_train = loss(train_x, train_y, W, B).numpy()
    loss_valid = loss(valid_x, valid_y, W, B).numpy()
    acc_train = accuracy(train_x, train_y, W, B).numpy()
    acc_valid = accuracy(valid_x, valid_y, W, B).numpy()

    loss_list_train.append(loss_train)
    loss_list_valid.append(loss_valid)

    acc_list_train.append(acc_train)
    acc_list_valid.append(acc_valid)

    print("Epoch = {:3d}, "
          "Train_loss = {:.4f},"
          " Train_acc = {:4f},"
          " Val_loss = {:.4f}, "
          "Val_acc = {:.4f}".format(epoch + 1,
                                    loss_train,
                                    loss_valid,
                                    acc_train,
                                    acc_valid))

Epoch =   1, Train_loss = 10.9174, Train_acc = 10.893991, Val_loss = 0.3152, Val_acc = 0.3175
Epoch =   2, Train_loss = 9.7280, Train_acc = 9.659597, Val_loss = 0.3909, Val_acc = 0.3952
Epoch =   3, Train_loss = 9.2479, Train_acc = 9.249901, Val_loss = 0.4220, Val_acc = 0.4220
Epoch =   4, Train_loss = 9.0895, Train_acc = 9.087481, Val_loss = 0.4326, Val_acc = 0.4326
Epoch =   5, Train_loss = 8.9697, Train_acc = 8.980812, Val_loss = 0.4405, Val_acc = 0.4395
Epoch =   6, Train_loss = 7.6427, Train_acc = 7.624354, Val_loss = 0.5219, Val_acc = 0.5228
Epoch =   7, Train_loss = 7.4356, Train_acc = 7.471794, Val_loss = 0.5359, Val_acc = 0.5337
Epoch =   8, Train_loss = 7.3411, Train_acc = 7.396438, Val_loss = 0.5420, Val_acc = 0.5377
Epoch =   9, Train_loss = 7.2957, Train_acc = 7.333180, Val_loss = 0.5449, Val_acc = 0.5420
Epoch =  10, Train_loss = 7.2502, Train_acc = 7.284460, Val_loss = 0.5476, Val_acc = 0.5450
Epoch =  11, Train_loss = 7.2289, Train_acc = 7.278317, Val_loss = 0.5496, Val