In [11]:
import tensorflow as tf
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train.reshape(-1, 784) / 255.0, x_test.reshape(-1, 784) / 255.0
y_train, y_test = tf.keras.utils.to_categorical(y_train, 10), tf.keras.utils.to_categorical(y_test, 10)

learning_rate = 1e-4
keep_prob_rate = 0.7 # 
max_epoch = 3
batch_size = 32

# def compute_accuracy(v_xs, v_ys):
#     global prediction
#     y_pre = sess.run(prediction, feed_dict={xs: v_xs, keep_prob: 1})
#     correct_prediction = tf.equal(tf.argmax(y_pre,1), tf.argmax(v_ys,1))
#     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
#     result = sess.run(accuracy, feed_dict={xs: v_xs, ys: v_ys, keep_prob: 1})
#     return result

def compute_accuracy(x, y):
    y_pred, _ = model(x, training=False)
    correct_prediction = tf.equal(
        tf.argmax(y_pred, axis=1), tf.argmax(y, axis=1))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32)).numpy()

def weight_variable(shape):
    initial = tf.random.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    # 每一维度  滑动步长全部是 1， padding 方式 选择 same
    # 提示 使用函数  tf.nn.conv2d
    
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    # 滑动步长 是 2步; 池化窗口的尺度 高和宽度都是2; padding 方式 请选择 same
    # 提示 使用函数  tf.nn.max_pool
    
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

# define placeholder for inputs to network
# xs = tf.placeholder(tf.float32, [None, 784])/255.
# ys = tf.placeholder(tf.float32, [None, 10])
# keep_prob = tf.placeholder(tf.float32)
# xs = tf.keras.Input(shape=(784,))
# ys = tf.keras.Input(shape=(10,))
# keep_prob = tf.Variable(0.7, dtype=tf.float32)
# x_image = tf.reshape(xs, [-1, 28, 28, 1])

# 定义网络 方便后续使用tf2
class MyCNN(tf.keras.Model):
    def __init__(self):
        super(MyCNN, self).__init__()
        self.W_conv1 = weight_variable([7, 7, 1, 32])
        self.b_conv1 = bias_variable([32])

        self.W_conv2 = weight_variable([5, 5, 32, 64])
        self.b_conv2 = bias_variable([64])

        self.W_fc1 = weight_variable([7*7*64, 1024])
        self.b_fc1 = bias_variable([1024])

        self.W_fc2 = weight_variable([1024, 10])
        self.b_fc2 = bias_variable([10])

    def call(self, x, training=False):
        x_image = tf.reshape(x, [-1, 28, 28, 1])
        h_conv1 = tf.nn.relu(conv2d(x_image, self.W_conv1) + self.b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        h_conv2 = tf.nn.relu(conv2d(h_pool1, self.W_conv2) + self.b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, self.W_fc1) + self.b_fc1)

        if training:
            h_fc1 = tf.nn.dropout(h_fc1, rate=1 - keep_prob_rate)

        logits = tf.matmul(h_fc1, self.W_fc2) + self.b_fc2
        return tf.nn.softmax(logits), logits


# 初始化模型
model = MyCNN()
optimizer = tf.keras.optimizers.Adam(learning_rate)

# 训练循环
train_data = tf.data.Dataset.from_tensor_slices(
    (x_train, y_train)).shuffle(60000).batch(batch_size)
test_data = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(1000)

for epoch in range(max_epoch):
    for batch_xs, batch_ys in train_data:
        with tf.GradientTape() as tape:
            y_pred, logits = model(batch_xs, training=True)
            loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
                labels=batch_ys, logits=logits))
        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    test_acc = compute_accuracy(x_test[:1000], y_test[:1000])
    print(f'Epoch {epoch}, Test Accuracy: {test_acc:.4f}')


#  卷积层 1
## conv1 layer ##
# W_conv1 = weight_variable([7, 7, 1, 32]) # patch 7x7, in size 1, out size 32
# b_conv1 = bias_variable([32])
# h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) # 卷积  自己选择 选择激活函数
# h_pool1 = max_pool_2x2(h_conv1) # 池化

# # 卷积层 2
# W_conv2 = weight_variable([5, 5, 32, 64]) # patch 5x5, in size 32, out size 64
# b_conv2 = bias_variable([64])
# h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) # 卷积  自己选择 选择激活函数
# h_pool2 = max_pool_2x2(h_conv2) # 池化

# #  全连接层 1
# ## fc1 layer ##
# W_fc1 = weight_variable([7*7*64, 1024])
# b_fc1 = bias_variable([1024])

# h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
# h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
# h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

# # 全连接层 2
# ## fc2 layer ##
# W_fc2 = weight_variable([1024, 10])
# b_fc2 = bias_variable([10])
# prediction = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)


# 交叉熵函数
# cross_entropy = tf.reduce_mean(-tf.reduce_sum(ys * tf.math.log(prediction),axis=1))

# train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)

# with tf.Session() as sess:
#     init = tf.global_variables_initializer()
#     sess.run(init)
    
#     for i in range(max_epoch):
#         batch_xs, batch_ys = mnist.train.next_batch(100)
#         sess.run(train_step, feed_dict={xs: batch_xs, ys: batch_ys, keep_prob:keep_prob_rate})
#         if i % 100 == 0:
#             print(compute_accuracy(
#                 mnist.test.images[:1000], mnist.test.labels[:1000]))

Epoch 0, Test Accuracy: 0.9620
Epoch 1, Test Accuracy: 0.9860
Epoch 2, Test Accuracy: 0.9840
