In [4]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

In [5]:
max_step = 1000
learning_rate = 0.001
batch_size = 100
log_dir = "F:/tmp/"

In [6]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [7]:
with tf.name_scope("input"):
    X = tf.placeholder(dtype=tf.float32, shape=[None, 784], name="x_input")
    y_ = tf.placeholder(dtype=tf.float32, shape=[None, 10], name="y_input")

with tf.name_scope("input_reshape"):
    image_shaped_input = tf.reshape(X, [-1, 28, 28, 1])
    tf.summary.image(name="input", tensor=image_shaped_input, max_outputs=10)

# 参数初始化函数
# 其中 W 的 shape 为 长度为 4 的列表： 
# [height * width * in_channels, num of kernels(output_channels)]
def weight_variable(shape):
    initial = tf.truncated_normal(shape=shape, stddev=0.1)
    return tf.Variable(initial)
# b 的 shape 为长度为 1 的列表： [output_channels]
def bias_variable(shape):
    initial = tf.constant(value=0.1, shape=shape)
    return tf.Variable(initial)

def variable_summaries(var):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope("summaries"):
        # 计算参数的均值，并使用tf.summary.scaler记录
        mean = tf.reduce_mean(var)
        tf.summary.scalar(name="mean", tensor=mean)
        # 计算参数的标准差
        with tf.name_scope("stddev"):
            stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
        tf.summary.scalar(name="stddev", tensor=stddev)
        tf.summary.scalar(name="max", tensor=tf.reduce_max(var))
        tf.summary.scalar(name="min", tensor=tf.reduce_min(var))
        # 用直方图记录参数的分布
        tf.summary.histogram(name="histogram", values=var)

def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    """
    Reusable code for making a simple neural net layer.
    It does a matrix multiply, bias add, and then uses relu to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # 设置命名空间
    with tf.name_scope(layer_name):
        # 调用之前的方法初始化权重w，并且调用参数信息的记录方法，记录w的信息
        with tf.name_scope("weights"):
            weights = weight_variable([input_dim, output_dim])
            variable_summaries(weights)
        # 调用之前的方法初始化权重b，并且调用参数信息的记录方法，记录b的信息
        with tf.name_scope("biases"):
            biases = bias_variable([output_dim])
            variable_summaries(biases)
        # 执行wx+b的线性计算，并且用直方图记录下来
        with tf.name_scope("linear_compute"):
            Z = tf.matmul(input_tensor, weights) + biases
            tf.summary.histogram(name="linear", values=Z)
        # 将线性输出经过激励函数，并将输出也用直方图记录下来
        activations = act(Z, name="activation")
        tf.summary.histogram(name="activations", values=activations)
        
        # 返回激励层的最终输出
        return activations

hidden1 = nn_layer(input_tensor=X, 
                   input_dim=784, 
                   output_dim=500, 
                   layer_name="layer1", 
                   act=tf.nn.relu)

# 创建一个 dropout 层， 随机关闭掉 hidden1的一些神经元
with tf.name_scope("dropout"):
    prob = tf.placeholder(dtype=tf.float32)
    tf.summary.scalar(name="dropout_keep_prob", tensor=prob)
    dropped = tf.nn.dropout(x=hidden1, keep_prob=prob)

# 创建一个输出层
# 输入的维度是上一层的输出:500,
# 输出的维度是分类的类别种类：10，
# 激活函数设置为全等映射identity.（暂且先别使用softmax,会放在之后的损失函数中一起计算）
y = nn_layer(input_tensor=dropped, 
             input_dim=500, 
             output_dim=10, 
             layer_name="layer2", 
             act=tf.identity)       

# 计算损失， 并用 tf.summary记录
with tf.name_scope("loss"):
    # 计算交叉熵损失（每个样本都会有一个损失）
    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    with tf.name_scope("total"):
        # 计算所有样本交叉熵损失的均值
        cross_entropy = tf.reduce_mean(diff)
tf.summary.scalar(name="loss", tensor=cross_entropy)

# 设置训练
with tf.name_scope("train"):
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    
    
# 计算准确率,并用 tf.summary记录
with tf.name_scope("accuracy"):
    with tf.name_scope("correct_prediction"):
        correct_prediction = tf.equal(tf.argmax(y, axis=1), tf.argmax(y_, axis=1))
    with tf.name_scope("accuracy"):
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.summary.scalar(name="accuracy", tensor=accuracy)


# 将所有的summaries合并，并且将它们写到之前定义的log_dir路径
merged = tf.summary.merge_all()
#merged = tf.summary.merge(tf.get_collection(tf.GraphKeys.SUMMARIES))


def generate_feed(train=True):
    """Make a TensorFlow feed_dict: maps data onto Tensor placeholders."""
    if train:
        xs, ys = mnist.train.next_batch(batch_size)
        k = 0.9
    else:
        xs, ys = mnist.test.images, mnist.test.labels
        k = 1.0
    return {X: xs, y_: ys, prob: k} 

In [9]:
# 运行初始化所有变量
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(log_dir + "train", sess.graph)
test_writer = tf.summary.FileWriter(log_dir + "test")
# 开始训练模型
for i in range(max_step):
    if i % 10 == 0:
        summary, acc = sess.run([merged, accuracy], feed_dict=generate_feed(train=False))
        test_writer.add_summary(summary, i)
        print("test acc at step %d: %.4f" % (i, acc))
    else:
        if i % 100 == 99:  # Record execution stats
            run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
            run_metadata = tf.RunMetadata()
            summary, _ = sess.run([merged, train_step], 
                                  feed_dict=generate_feed(train=True), 
                                  options=run_options, 
                                  run_metadata=run_metadata)
            train_writer.add_run_metadata(run_metadata, "step%03d" % i)
            train_writer.add_summary(summary, i)
            print("Adding run metadata for ", i)
        else:  # Record a summary
            summary, _ = sess.run([merged, train_step], 
                                  feed_dict=generate_feed(train=True))
train_writer.close()
test_writer.close()    

test acc at step 0: 0.1550
test acc at step 10: 0.7251
test acc at step 20: 0.8296
test acc at step 30: 0.8649
test acc at step 40: 0.8811
test acc at step 50: 0.8923
test acc at step 60: 0.9008
test acc at step 70: 0.9049
test acc at step 80: 0.9098
test acc at step 90: 0.9140
Adding run metadata for  99
test acc at step 100: 0.9161
test acc at step 110: 0.9194
test acc at step 120: 0.9225
test acc at step 130: 0.9263
test acc at step 140: 0.9239
test acc at step 150: 0.9275
test acc at step 160: 0.9301
test acc at step 170: 0.9296
test acc at step 180: 0.9335
test acc at step 190: 0.9325
Adding run metadata for  199
test acc at step 200: 0.9368
test acc at step 210: 0.9376
test acc at step 220: 0.9359
test acc at step 230: 0.9381
test acc at step 240: 0.9390
test acc at step 250: 0.9358
test acc at step 260: 0.9375
test acc at step 270: 0.9433
test acc at step 280: 0.9424
test acc at step 290: 0.9432
Adding run metadata for  299
test acc at step 300: 0.9409
test acc at step 310: 0.94