In [1]:
import os
os.sys.path.append(os.path.dirname(os.path.abspath('.')))

Tensorboard是TensorFlow提供的一可视化工具，这里修改```mini_CNN```的代码以做演示。

首先需要构建好计算图：

In [2]:
import tensorflow as tf
from dataset.dataset import load_cifar10


train_data, test_data = load_cifar10(batch_size=64)

unit_I = train_data.n_features

filters = 32
conv_size = (3, 3)

pool_size = (2, 2)
strides = (2, 2)

unit_O = 10

X = tf.placeholder(tf.float32, [None, unit_I])
Y = tf.placeholder(tf.int64, [None])
X_img = tf.transpose(tf.reshape(
    X, [-1, 3, 32, 32]), perm=[0, 2, 3, 1])

with tf.name_scope('CNN'):
    conv1 = tf.layers.conv2d(X_img, filters=filters,
                             kernel_size=conv_size, padding='same',
                             activation=tf.nn.relu, name='conv1')
    pooling1 = tf.layers.max_pooling2d(conv1, pool_size=pool_size,
                                       strides=strides, name='pooling1')
    conv2 = tf.layers.conv2d(pooling1, filters=filters,
                             kernel_size=conv_size, padding='same',
                             activation=tf.nn.relu, name='conv2')
    pooling2 = tf.layers.max_pooling2d(conv2, pool_size=pool_size,
                                       strides=strides, name='pooling2')
    conv3 = tf.layers.conv2d(pooling2, filters=filters,
                             kernel_size=conv_size, padding='same',
                             activation=tf.nn.relu, name='conv3')
    pooling3 = tf.layers.max_pooling2d(conv3, pool_size=pool_size,
                                       strides=strides, name='pooling3')
    logits = tf.layers.dense(tf.layers.flatten(
        pooling3), unit_O, activation=None)

with tf.name_scope('Eval'):
    loss = tf.losses.sparse_softmax_cross_entropy(labels=Y, logits=logits)
    predict = tf.argmax(logits, 1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(predict, Y), tf.float32))

with tf.name_scope('train_op'):
    lr = 1e-3
    train_op = tf.train.AdamOptimizer(lr).minimize(loss)

init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True

  from ._conv import register_converters as _register_converters


(50000, 3072) (50000,)




(10000, 3072) (10000,)
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.




Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


在构建好图之后，使用tensorboard相关API加入我们关心的变量。当然以下代码也可写在计算图中：

In [3]:
loss_summary = tf.summary.scalar('loss', loss)    # loss
acc_summary = tf.summary.scalar('acc', accuracy)    # acc
img_summary = tf.summary.image('input_img', X_img)    # 图片型数据

# 不同阶段关注的数据可能不同
train_summary = tf.summary.merge_all()    # 训练阶段关注所有变量
test_summary = tf.summary.merge([loss_summary,
                                 acc_summary])    # 测试阶段只关注loss与acc_val

指定输出的log文件夹：

In [4]:
LOG_ROOT = '.'    # notebook的同级目录
save_dir = os.path.join(LOG_ROOT, 'Tensorboard')    # notebook的同名文件夹
if not os.path.exists(save_dir):    # 不存在则创建
    os.makedirs(save_dir)

# train与test数据分开存放
train_log_dir = os.path.join(save_dir, 'train')
if not os.path.exists(train_log_dir):
    os.makedirs(train_log_dir)

test_log_dir = os.path.join(save_dir, 'test')
if not os.path.exists(test_log_dir):
    os.makedirs(test_log_dir)

在训练代码中加入writer：

In [5]:
with tf.Session(config=config) as sess:
    sess.run(init)
    epochs = 20

    # 创建writer
    train_writer = tf.summary.FileWriter(
        train_log_dir, sess.graph)    # 将计算图写入训练文件夹
    test_writer = tf.summary.FileWriter(test_log_dir)

    # 这里关于测试集的summary比较麻烦，因为数据类是写成了生成器的模式，但是在写summary的过程无法使用循环
    # 有两种方法，一种是提前取出一个测试batch去估计测试summary，而是将测试数据全部取出来

    batch_cnt = 0
    for epoch in range(epochs):
        for batch_data, batch_labels in train_data.next_batch():
            loss_val, acc_val, _ = sess.run(
                [loss, accuracy, train_op],
                feed_dict={
                    X: batch_data,
                    Y: batch_labels})
            batch_cnt += 1

            # 每100个batch写入一次summary
            if (batch_cnt+1) % 100 == 0:
                sum_res_train = sess.run(train_summary,
                                         feed_dict={X: batch_data,
                                                    Y: batch_labels})
                train_writer.add_summary(sum_res_train, batch_cnt+1)

                # 关键在于测试集
                sum_res_test = sess.run(test_summary,
                                        feed_dict={X: test_data.data,
                                                   Y: test_data.target})
                test_writer.add_summary(sum_res_test, batch_cnt+1)
                
    print('training done.')

epoch: 1, batch_loss: 1.1910089254379272, batch_acc: 0.515625
epoch: 2, batch_loss: 0.9810861945152283, batch_acc: 0.671875
epoch: 3, batch_loss: 1.1511430740356445, batch_acc: 0.609375
epoch: 5, batch_loss: 0.8800821900367737, batch_acc: 0.625
epoch: 6, batch_loss: 0.6403199434280396, batch_acc: 0.765625
epoch: 6, test_acc: 0.7093999981880188
epoch: 7, batch_loss: 0.7179678678512573, batch_acc: 0.703125
epoch: 8, batch_loss: 0.5536156296730042, batch_acc: 0.828125
epoch: 10, batch_loss: 0.7096176147460938, batch_acc: 0.828125
epoch: 11, batch_loss: 0.5687694549560547, batch_acc: 0.78125
epoch: 12, batch_loss: 0.6487060785293579, batch_acc: 0.75
epoch: 12, test_acc: 0.7311000227928162
epoch: 14, batch_loss: 0.5611023902893066, batch_acc: 0.828125
epoch: 15, batch_loss: 0.7122163772583008, batch_acc: 0.703125
epoch: 16, batch_loss: 0.5313572883605957, batch_acc: 0.875
epoch: 17, batch_loss: 0.49914979934692383, batch_acc: 0.8125
epoch: 19, batch_loss: 0.40212172269821167, batch_acc: 0.8

进入```./Tensorboard/```目录，输入

```bash
tensorboard --logdir=train:'train',test:'test'
```

TensorBoard的默认web端口为```:6006```。