# 课时56 自定义训练中使用Tensorboard

In [1]:
import pandas as pd
import numpy as np
import seaborn as sb
sb.set_style('darkgrid')
# pathlib相比os.path更好用
import pathlib
import random
import matplotlib.pyplot as plt
import tensorflow as tf
import glob
print('Tensorflow Version:', tf.__version__)

Tensorflow Version: 2.0.0


# 1. 自定义训练添加Tensorboard

In [2]:
# 加载MNIST数据集
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
# 以tf.Dataset形式加载数据(对于MNIST数据集，它是没有第三个维度Channel的，这里添加上它的第三个维度)
train_images = tf.expand_dims(train_images, -1)
# 由于tf.GradientTape()要求的数据类型是float，因此这里需要改变MNIST数据集图片的数据类型
# 并在转换数据类型的过程中对图片数据进行归一化
train_images = tf.cast(train_images/255, tf.float32)
# 对于MNIST数据集的标签，数据类型是uint8(无符号8位)，这里为了方便计算，将其转换为int64类型
train_labels = tf.cast(train_labels, tf.int64)
dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))

test_images = tf.expand_dims(test_images, -1)
test_images = tf.cast(test_images/255, tf.float32)
test_labels = tf.cast(test_labels, tf.int64)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_dataset

<TensorSliceDataset shapes: ((28, 28, 1), ()), types: (tf.float32, tf.int64)>

In [3]:
# 对数据进行打乱以及batch划分
dataset = dataset.shuffle(buffer_size=10000).batch(batch_size=32)
dataset

<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [4]:
test_dataset = test_dataset.batch(batch_size=32)
test_dataset

<BatchDataset shapes: ((None, 28, 28, 1), (None,)), types: (tf.float32, tf.int64)>

In [5]:
# 建立模型
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=16, kernel_size=[3, 3], activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=[3, 3], activation='relu'),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(units=10, activation='softmax')])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 32)        4640      
_________________________________________________________________
global_average_pooling2d (Gl (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 10)                330       
Total params: 5,130
Trainable params: 5,130
Non-trainable params: 0
_________________________________________________________________


In [6]:
# 进行自定义训练
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# 由于dataset是tf.Dataset对象，因此在eager模型下是直接可迭代的(这里设定了batch=32，因此每次迭代都取出32个数据)：
features, labels = next(iter(dataset))

In [7]:
# model内置call方法，因此可以直接调用
predictions = model(features)
# 还没进行训练的时候预测的进度不好
tf.argmax(predictions, axis=1)

<tf.Tensor: id=157, shape=(32,), dtype=int64, numpy=
array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3], dtype=int64)>

In [8]:
# 定义损失函数，每次传入数据之后都会返回相应的损失值
def loss(model, x, y):
    y_ = model(x)
    return loss_func(y, y_)

In [9]:
# 定义两个tf.keras.metrics对象用于计算自定义训练中的loss均值和正确率
train_loss = tf.keras.metrics.Mean('train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')

test_loss = tf.keras.metrics.Mean('test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')

In [10]:
# 计算一步对应的损失函数与模型参数的梯度
def train_step(model, images, labels):
    with tf.GradientTape() as t:
        # loss_step = loss(model, images, labels)
        # 为了方便tf.keras.metrics模块的演示，将上面这个代码拆成两步，不再调用loss计算函数
        pred = model(images)
        loss_step = loss_func(labels, pred)
    grads = t.gradient(loss_step, model.trainable_variables)
    # apply_gradients()代表将优化方法应用到参数的更新中
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    # 这里是新增的部分，每一步都计算均值和正确率
    train_loss(loss_step)
    train_accuracy(labels, pred)

In [11]:
# 计算一步对应的损失函数与模型参数的梯度
def test_step(model, images, labels):
    pred = model(images)
    loss_step = loss_func(labels, pred)
    test_loss(loss_step)
    test_accuracy(labels, pred)

In [18]:
# 创建filewriter
import datetime
import os
# current_time = datetime.datetime.now().strftime('%Y%M%D')
train_log_dir = os.path.join('logss', 'train')
test_log_dir = os.path.join('logss', 'test')

train_writer = tf.summary.create_file_writer(logdir=train_log_dir)
test_writer = tf.summary.create_file_writer(logdir=test_log_dir)

In [19]:
def train_model():
    # 训练3个epochs
    for epoch in range(3):
        for (batch, (images, labels)) in enumerate(dataset):
            train_step(model, images, labels)
            
        with train_writer.as_default():
            tf.summary.scalar('train_loss', train_loss.result(), step=epoch)
            tf.summary.scalar('train_acc', train_accuracy.result(), step=epoch)
        
        print('Epoch %i loss is %f, acc is %f'%(epoch, 
                                                train_loss.result(),
                                                train_accuracy.result()))
        # 每个batch结束就重置这一轮的train_loss和train_accuracy
        train_loss.reset_states()
        train_accuracy.reset_states()
        
        # =====================================================================
        for (batch, (images, labels)) in enumerate(test_dataset):
            test_step(model, images, labels)
        
        with train_writer.as_default():
            tf.summary.scalar('test_loss', test_loss.result(), step=epoch)
            tf.summary.scalar('test_acc', test_accuracy.result(), step=epoch)
        
        print('Epoch %i test_loss is %f, test_acc is %f'%(epoch, 
                                                test_loss.result(),
                                                test_accuracy.result()))
        
        # 每个batch结束就重置这一轮的train_loss和train_accuracy
        test_loss.reset_states()
        test_accuracy.reset_states()

In [20]:
train_model()

Epoch 0 loss is 2.157457, acc is 0.307167
Epoch 0 test_loss is 2.085262, test_acc is 0.376000
Epoch 1 loss is 2.044552, acc is 0.430450
Epoch 1 test_loss is 2.009225, test_acc is 0.465300
Epoch 2 loss is 1.985295, acc is 0.495200
Epoch 2 test_loss is 1.954638, test_acc is 0.524500
