# Minst 手写数字识别

## 单层神经网络建模

In [17]:
import tensorflow as tf
import tensorflow.keras as keras

print(tf.__version__)

2.7.0


## 载入数据

In [18]:
# 获取 Mints手写数字数据集
mnist = keras.datasets.mnist

# 训练数据，测试数据
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

## 划分验证集

In [19]:
# 训练集总数，以次数据为准将训练集划分为训练集与验证集
total_num = len(train_images)

# 验证集占训练集的比例
valid_split = 0.2

# 实际训练集数量
train_num = int(total_num * (1 - valid_split))

# 训练集（0% ~ 80%）
train_x = train_images[:train_num]
train_y = train_labels[:train_num]

# 剩余部分为验证集（80% ~ 100%）
valid_x = train_images[train_num:]
valid_y = train_labels[train_num:]

test_x = test_images
test_y = test_labels

## 数据降维

In [20]:
# 把（28，28）的结构拉直为 1行 784列
# -1代表由系统计算行数，指定列数为784列
train_x = train_x.reshape(-1, 784)
valid_x = valid_x.reshape(-1, 784)
test_x = test_x.reshape(-1, 784)

## 特征数据归一化

In [21]:
train_x = tf.cast(train_x / 255.0, tf.float32)
valid_x = tf.cast(valid_x / 255.0, tf.float32)
test_x = tf.cast(test_x / 255.0, tf.float32)

## 独热编码

In [22]:
# 对标签数据进行独热编码
train_y = tf.one_hot(train_y, depth=10)
valid_y = tf.one_hot(valid_y, depth=10)
test_y = tf.one_hot(test_y, depth=10)

## 创建待优化变量

In [23]:
# 输入（784） -> 第一层隐层（64） -> 输出层（10）
# 定义第一层隐藏层权重和偏置项变量
Input_Dim = 784
H1_NN = 64

W1 = tf.Variable(tf.random.normal([Input_Dim, H1_NN], mean=0.0, stddev=1.0, dtype=tf.float32))
B1 = tf.Variable(tf.zeros([H1_NN]), dtype=tf.float32)

In [24]:
# 定义输出层权重和偏置项变量
Output_Dim = 10

W2 = tf.Variable(tf.random.normal([H1_NN, Output_Dim], mean=0.0, stddev=1.0, dtype=tf.float32))
B2 = tf.Variable(tf.zeros([Output_Dim]), dtype=tf.float32)

In [25]:
# 建立待优化列表
W = [W1, W2]
B = [B1, B2]

## 定义模型前向计算

In [26]:
# 扩展多分类模型
# 模型计算后进行Softmax分类，决定属于哪个标签分类（本例为10分类）
def model(x, w, b):
    x = tf.matmul(x, w[0]) + b[0]
    x = tf.nn.relu(x)
    x = tf.matmul(x, w[1]) + b[1]
    pred = tf.nn.softmax(x)

    # 返回预测标签值
    return pred

## 定义交叉熵函数

In [27]:
# 定义交叉熵损失函数
def loss(x, y, w, b):
    # 前向计算
    pred = model(x, w, b)

    # 计算模型预测值与真实值差异
    loss_ = tf.keras.losses.categorical_crossentropy(y_true=y, y_pred=pred)

    # 求均方差
    return tf.reduce_mean(loss_)

## 设置训练参数

In [28]:
train_epochs = 20
batch_size = 50
learning_rate = 0.001

## 定义梯度计算函数

In [29]:
# 计算样本数据[x, y]在参数[w, b]上的梯度
def grad(x, y, w, b):
    var_list = w + b
    with tf.GradientTape() as tape:
        loss_ = loss(x, y, w, b)

    # 返回梯度向量
    return tape.gradient(loss_, var_list)

## 选择优化器

In [30]:
# Adam优化器
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

## 定义准确率

In [31]:
def accuracy(x, y, w, b):
    # 计算模型预测标签值与标签值的差异
    pred = model(x, w, b)

    # 检查预测类别tf.argmax(pred, 1)与实际类别tf.argmax(y, 1)的匹配情况
    # 匹配成功返回 True
    # 匹配失败返回 False
    correct_predication = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))

    # 将匹配结果转换为float32输出，得出准确率
    return tf.reduce_mean(tf.cast(correct_predication, tf.float32))

## 模型训练

In [32]:
total_step = int(train_num / batch_size)

loss_list_train = []
loss_list_valid = []
acc_list_train = []
acc_list_valid = []

for epoch in range(train_epochs):
    for step in range(total_step):
        xs = train_x[step * batch_size:(step + 1) * batch_size]
        ys = train_y[step * batch_size:(step + 1) * batch_size]

        grads = grad(xs, ys, W, B)
        optimizer.apply_gradients(zip(grads, W + B))

    loss_train = loss(train_x, train_y, W, B).numpy()
    loss_valid = loss(valid_x, valid_y, W, B).numpy()
    acc_train = accuracy(train_x, train_y, W, B).numpy()
    acc_valid = accuracy(valid_x, valid_y, W, B).numpy()

    loss_list_train.append(loss_train)
    loss_list_valid.append(loss_valid)

    acc_list_train.append(acc_train)
    acc_list_valid.append(acc_valid)

    print("Epoch = {:3d}, "
          "Train_loss = {:.4f},"
          " Train_acc = {:4f},"
          " Val_loss = {:.4f}, "
          "Val_acc = {:.4f}".format(epoch + 1,
                                    loss_train,
                                    loss_valid,
                                    acc_train,
                                    acc_valid))

Epoch =   1, Train_loss = 5.3776, Train_acc = 5.252128, Val_loss = 0.6315, Val_acc = 0.6391
Epoch =   2, Train_loss = 4.6154, Train_acc = 4.539206, Val_loss = 0.6885, Val_acc = 0.6916
Epoch =   3, Train_loss = 4.3253, Train_acc = 4.278510, Val_loss = 0.7100, Val_acc = 0.7137
Epoch =   4, Train_loss = 4.1500, Train_acc = 4.156221, Val_loss = 0.7239, Val_acc = 0.7229
Epoch =   5, Train_loss = 2.9688, Train_acc = 2.967189, Val_loss = 0.7905, Val_acc = 0.7924
Epoch =   6, Train_loss = 2.5295, Train_acc = 2.587363, Val_loss = 0.8245, Val_acc = 0.8183
Epoch =   7, Train_loss = 2.4331, Train_acc = 2.531339, Val_loss = 0.8309, Val_acc = 0.8227
Epoch =   8, Train_loss = 2.3510, Train_acc = 2.460204, Val_loss = 0.8379, Val_acc = 0.8269
Epoch =   9, Train_loss = 2.2829, Train_acc = 2.417430, Val_loss = 0.8437, Val_acc = 0.8305
Epoch =  10, Train_loss = 2.2458, Train_acc = 2.393731, Val_loss = 0.8462, Val_acc = 0.8335
Epoch =  11, Train_loss = 2.1997, Train_acc = 2.358518, Val_loss = 0.8491, Val_a