Hi everyone! In this notebook we will learn how to train a model with eager mode of Tensorflow 2. For teaching objectives, we just focus on implementing the training phase with eager mode. A more comprehensive training procedure is given in KerasTraining.ipynb.

In [None]:
# sth for colab
"""
%cd /content
!git clone https://github.com/MSBD-5001/Lecture-Materials
%cd Lecture-Materials/workshop
"""

In [None]:
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import time
print(tf.__version__)

In [None]:
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [None]:
def show_images(dataset, class_names, predicts=None):
    fig, axes = plt.subplots(3, 3)
    fig.subplots_adjust(hspace=0.8, wspace=0.4)
    for i, (image, label) in enumerate(dataset):
        if i>= 9: 
            break
        if image.shape[-1] == 1:
            revised_image = np.squeeze(image)
        else:
            revised_image = image
        axes.flat[i].imshow(revised_image)
        # Name of the true class.
        label_name = class_names[label]
        
        # Show true and predicted classes.
        if predicts is None:
            axes.flat[i].set_xlabel("True : {0}".format(label_name))
        else:
            predict_name = class_names[predicts[i]]
            axes.flat[i].set_xlabel("True: {0}\nPred: {1}".format(label_name, predict_name))
        # Remove ticks from the plot.
        axes.flat[i].set_xticks([])
        axes.flat[i].set_yticks([])
    while(i < 8):
        i += 1
        axes.flat[i].axis('off')
    plt.show()

Preparing Data

Preparing a simple Model.

Necessary components in training procedure: <b>Optimizer</b>, <b>loss</b> and <b>metrics</b>.

In [None]:
optimizer = tf.keras.optimizers.RMSprop()

compute_loss = tf.keras.losses.SparseCategoricalCrossentropy()

compute_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

Automatic differentiation is useful for implementing machine learning algorithms such as backpropagation for training neural networks. During eager execution, use tf.GradientTape to trace operations for computing gradients later.

All forward-pass operations get recorded to a "tape". To compute the gradient, play the tape backwards and then discard. So a particular tf.GradientTape can only compute gradient once; subsequent calls throw a runtime error.

In [None]:
@tf.function
def train_one_step(model, optimizer, x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss = compute_loss(y, logits)

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    compute_accuracy(y, logits)
    return loss

tf.function annotation will construct a graph by packing operations in the function, which may reduce the running time. However, more time is needed for initializing the graph.

In [None]:
subTrainDataset = trainDataset.shuffle(buffer_size=1024).batch(64)
subTestDataset = testDataset.batch(64)

In [None]:
epochs = 2
for epoch in range(epochs):
    print("epoch: ", epoch)
    start_time = time.time()
    step = 0
    compute_accuracy.reset_states()
    for x,y in subTrainDataset:
        step += 1
        loss = train_one_step(cnnModel, optimizer, x, y)
        if step % 50 == 0:
            tf.print("Loss: ", loss, "Accuracy on training data:", compute_accuracy.result())
    compute_accuracy.reset_states()
    
    for x, y in subTestDataset:
        logits = cnnModel(x, training=False)
        compute_accuracy(y, logits)
    test_acc = compute_accuracy.result()
    print("Accuracy on testing data: %.4f" % (float(test_acc),))
    print("Time taken: %.2fs" % (time.time() - start_time))

In [None]:
predict_result = tf.argmax(cnnModel.predict(trainDataset.take(9).batch(9)),axis=1)

In [None]:
predict_result.numpy()

In [None]:
show_images(trainDataset, class_names, predict_result)