In [1]:
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

  from ._conv import register_converters as _register_converters


In [2]:
# 载入数据集
mnist = input_data.read_data_sets("MNIST_data", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data\train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data\t10k-images-idx3-ubyte.gz
Extracting MNIST_data\t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [3]:
# 每个批次的大小
batch_size = 100
# 计算一共有多少个批次
n_batch = mnist.train.num_examples // batch_size

# 初始化权值
def weight_varible(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

# 初始化偏置
def bias_varible(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

# 卷积层
def conv2d(x, W):
    #x input tensor of shape '[batch,in_height,in_width,in_channles]'
    #W filter / kernel tensor of shape [filter_height,filter_width,in_channels,out_channels]
    #`strides[0] = strides[3] = 1`. strides[1]代表x方向的步长，strides[2]代表y方向的步长
    #padding: A `string` from: `"SAME", "VALID"`
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

# 池化层
def max_pool_2x2(x):
    # ksize [1,x,y,1]
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

with tf.name_scope('input'):
    # 定义两个placeholder
    x = tf.placeholder(tf.float32, [None, 784], name='x-input')
    y = tf.placeholder(tf.float32, [None, 10], name='y-input')
    with tf.name_scope('x_image'):
        # 改变x的格式转换为4D的向量[batch, in_height, in_width, in_channels]
        x_image = tf.reshape(x, [-1, 28, 28, 1])

with tf.name_scope('conv1'):
    # 初始化第一个卷积层的权重和偏置
    with tf.name_scope('W_conv1'):
        W_conv1 = weight_varible([5, 5, 1, 32])  # 5*5采样窗口，32个卷积核从一个平面抽取特征
    with tf.name_scope('b_conv1'):
        b_conv1 = bias_varible([32])  # 每一个卷积核一个偏置值

    # 把 x_image 和权重向量进行卷积，再加上偏置值，然后应用于 relu 激活函数
    with tf.name_scope('conv2d_1'):
        conv2d_1 = conv2d(x_image, W_conv1) + b_conv1
    with tf.name_scope('relu'):
        h_conv1 = tf.nn.relu(conv2d_1)
    with tf.name_scope('h_pool1'):
        h_pool1 = max_pool_2x2(h_conv1)

with tf.name_scope('conv2'):
    # 初始化第二个卷积层的权重和偏置
    with tf.name_scope('W_conv2'):
        W_conv2 = weight_varible([5, 5, 32, 64])  # 5*5采样窗口，64个卷积核从32个平面抽取特征
    with tf.name_scope('b_conv2'):
        b_conv2 = bias_varible([64])

    # 把 h_pool1 和权重向量进行卷积，再加上偏置值，然后应用于 relu 激活函数
    with tf.name_scope('conv2d_2'):
        conv2d_2 = conv2d(h_pool1, W_conv2) + b_conv2
    with tf.name_scope('h_conv2'):
        h_conv2 = tf.nn.relu(conv2d_2)
    
    with tf.name_scope('h_pool2'):
        h_pool2 = max_pool_2x2(h_conv2)

# 28*28 的图片第一次卷积后还是 28*28， 第一次池化后变为 14*14
# 第二次卷积后为 14*14，池化后变为 7*7
# 经过上面操作后得到 64 张 7*7 的平面

with tf.name_scope('fc1'):
    # 初始化第一个全连接层的权值
    with tf.name_scope('W_fc1'):
        W_fc1 = weight_varible([7*7*64, 1024])  # 生一层有 7*7*64 个神经元，全连接层有1024个神经元
    with tf.name_scope('b_fc1'):
        b_fc1 = bias_varible([1024])

    # 把池化层2的输出扁平化为1维
    with tf.name_scope('h_pool2_flat'):
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
    # 求第一个全连接层的输出
    with tf.name_scope('wx_plus_b_1'):
        wx_plus_b_1 = tf.matmul(h_pool2_flat, W_fc1) + b_fc1
    with tf.name_scope('h_fc1'):
        h_fc1 = tf.nn.relu(wx_plus_b_1)

    # keep_prob 用于表示神经元输出概率
    with tf.name_scope('keep_prob'):
        keep_prob = tf.placeholder(tf.float32)
    with tf.name_scope('h_fc1_drop'):
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

with tf.name_scope('fc2'):
    # 初始化第二个全连接层的权值
    with tf.name_scope('W_fc2'):
        W_fc2 = weight_varible([1024, 10])  # 生一层有 7*7*64 个神经元，全连接层有1024个神经元
    with tf.name_scope('b_fc2'):
        b_fc2 = bias_varible([10])
    with tf.name_scope('wx_plus_b_2'):
        wx_plus_b_2 = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
    with tf.name_scope('softmax'):
        prediction = tf.nn.softmax(wx_plus_b_2)

# 交叉熵代价函数
with tf.name_scope('cross_entropy'):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=prediction), name='cross_entropy')
    tf.summary.scalar('cross_entropy', cross_entropy)
    
# 使用 AdamOptimizer 进行优化
with tf.name_scope('train'):
    train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)

with tf.name_scope('accuracy'):
    # 结果存放在一个布尔型列表中
    with tf.name_scope('correct_predition'):
        correct_predition = tf.equal(tf.argmax(y, 1), tf.argmax(prediction, 1))   # argmax 返回一维张量中最大值所在的位置
    # 求准确率
    with tf.name_scope('accuracy'):
        accuracy = tf.reduce_mean(tf.cast(correct_predition, tf.float32), name='accuracy')
        tf.summary.scalar('accuracy', accuracy)

# 合并所有summary
merged = tf.summary.merge_all()
        
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    train_writer = tf.summary.FileWriter('logs/train', sess.graph)
    test_writer = tf.summary.FileWriter('logs/test', sess.graph)
    for i in range(1001):
        # 训练模型
        batch_xs, batch_ys = mnist.train.next_batch(batch_size)
        sess.run(train_step, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 0.5})
        
        # 记录训练集计算的参数
        summary = sess.run(merged, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.0})
        train_writer.add_summary(summary, i)
        
        # 记录测试集计算的参数
        batch_xs, batch_ys = mnist.test.next_batch(batch_size)
        summary = sess.run(merged, feed_dict={x: batch_xs, y: batch_ys, keep_prob: 1.0})
        test_writer.add_summary(summary, i)
        
        if i % 100 == 0:
            
            test_acc = sess.run(accuracy, feed_dict={x:mnist.test.images, y:mnist.test.labels, keep_prob: 1.0})
            train_acc = sess.run(accuracy, feed_dict={x:mnist.train.images[:10000], y:mnist.train.labels[:10000], keep_prob: 1.0})
            print('Iter' + str(i) + ', Testing Accuracy ' + str(test_acc) + ', Training Accuracy ' + str(train_acc))


Iter0, Testing Accuracy 0.1015, Training Accuracy 0.1022
Iter100, Testing Accuracy 0.5573, Training Accuracy 0.5419
Iter200, Testing Accuracy 0.8094, Training Accuracy 0.7914
Iter300, Testing Accuracy 0.9247, Training Accuracy 0.9141
Iter400, Testing Accuracy 0.9415, Training Accuracy 0.9324
Iter500, Testing Accuracy 0.9487, Training Accuracy 0.9417
Iter600, Testing Accuracy 0.9534, Training Accuracy 0.9528
Iter700, Testing Accuracy 0.9594, Training Accuracy 0.9589
Iter800, Testing Accuracy 0.9618, Training Accuracy 0.9632
Iter900, Testing Accuracy 0.9631, Training Accuracy 0.9623
Iter1000, Testing Accuracy 0.9669, Training Accuracy 0.967
