<a href="https://colab.research.google.com/github/LxYuan0420/eat_tensorflow2_in_30_days/blob/master/notebooks/6_2_Three_Ways_of_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**6-2 Three Ways of Training**

There are three ways of model training: using pre-defined fit method, using pre-defined tran_on_batch method, using customized training loop.

Note: fit_generator method is not recommended in tf.keras since it has been merged into fit.

In [1]:
import numpy as np 
import pandas as pd 
import tensorflow as tf
from tensorflow.keras import * 

# Time stamps
@tf.function
def printbar():
    ts = tf.timestamp()
    today_ts = ts%(24*60*60)

    hour = tf.cast(today_ts//3600+8,tf.int32)%tf.constant(24)
    minite = tf.cast((today_ts%3600)//60,tf.int32)
    second = tf.cast(tf.floor(today_ts%60),tf.int32)
    
    def timeformat(m):
        if tf.strings.length(tf.strings.format("{}",m))==1:
            return(tf.strings.format("0{}",m))
        else:
            return(tf.strings.format("{}",m))
    
    timestring = tf.strings.join([timeformat(hour),timeformat(minite),
                timeformat(second)],separator = ":")
    tf.print("=========="*8,end = "")
    tf.print(timestring)
    

In [2]:
MAX_LEN = 300
BATCH_SIZE = 32

(x_train, y_train), (x_test, y_test) = datasets.reuters.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


  x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx])
  x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:])


In [4]:
x_train[:2]

array([list([1, 27595, 28842, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]),
       list([1, 3267, 699, 3434, 2295, 56, 16784, 7511, 9, 56, 3906, 1073, 81, 5, 1198, 57, 366, 737, 132, 20, 4093, 7, 19261, 49, 2295, 13415, 1037, 3267, 699, 3434, 8, 7, 10, 241, 16, 855, 129, 231, 783, 5, 4, 587, 2295, 13415, 30625, 775, 7, 48, 34, 191, 44, 35, 1795, 505, 17, 12])],
      dtype=object)

In [9]:
y_train[:2]

array([3, 4])

In [8]:
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=MAX_LEN)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=MAX_LEN)

MAX_WORDS = x_train.max() + 1
CAT_NUM = y_train.max() + 1

# we are trying differents way of traininig so we dont do repeat()
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE).cache()
ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE).cache()

**1. Predefined `fit` method**

This is a powerful method, which supports training the data with types of numpy array. `tf.data.Dataset` and Python generator.

This method also supports complicated logical controlling through proper configuration of the callbacks.

In [14]:
def create_model():

    model = models.Sequential()
    model.add(layers.Embedding(MAX_WORDS, 7, input_length=MAX_LEN))
    model.add(layers.Conv1D(filters=64, kernel_size=5, activation='relu'))
    model.add(layers.MaxPool1D())
    model.add(layers.Conv1D(filters=32, kernel_size=3, activation='relu'))
    model.add(layers.MaxPool1D())
    model.add(layers.Flatten())
    model.add(layers.Dense(CAT_NUM, activation='softmax'))

    return model

def compile_model(model):
    model.compile(
        optimizer=optimizers.Nadam(),
        loss = losses.SparseCategoricalCrossentropy(),
        metrics = [metrics.SparseCategoricalAccuracy(), metrics.SparseTopKCategoricalAccuracy()]
    )

    return model

In [15]:
model = create_model()
model.summary()
model = compile_model(model)

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 300, 7)            216874    
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 148, 64)           0         
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 2336)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 46)               

In [16]:
history = model.fit(
    ds_train,
    validation_data=ds_test,
    epochs=10
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


**2. Pre-defined train_on_batch method**

This pre-defined method allows fine-controlling to the training procedure for each batch without the callbacks, which is even more flexible than fit method.

In [17]:
model_2 = create_model()
model_2.summary()
model_2 = compile_model(model_2)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (None, 300, 7)            216874    
_________________________________________________________________
conv1d_4 (Conv1D)            (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d_4 (MaxPooling1 (None, 148, 64)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_5 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 2336)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 46)               

In [19]:
def train_model(model, ds_train, ds_valid, epoches):
    for epoch in tf.range(1, epoches+1):
        model.reset_metrics()

        #Reduce learnign rate at the late stage of training.
        if epoch == 5:
            model.optimizer.lr.assign(model.optimizer.lr/2.0)
            tf.print(f"Lowering optimizer learnign rate to: {model.optimizer.lr}")
            
        for x, y in ds_train:
            train_result = model.train_on_batch(x, y)

        for x, y in ds_valid:
            valid_result = model.test_on_batch(x, y, reset_metrics=False)

        if epoch%1 == 0:
            printbar()
            tf.print(f"epoch = {epoch}")
            print("Train: ", dict(zip(model.metrics_names, train_result)))
            print("Valid: ", dict(zip(model.metrics_names, valid_result)))
            print("")

In [20]:
train_model(model_2, ds_train, ds_test, 10)

epoch = 1
Train:  {'loss': 1.3428311347961426, 'sparse_categorical_accuracy': 0.6363636255264282, 'sparse_top_k_categorical_accuracy': 0.8636363744735718}
Valid:  {'loss': 1.8067418336868286, 'sparse_categorical_accuracy': 0.5209261178970337, 'sparse_top_k_categorical_accuracy': 0.7595725655555725}

epoch = 2
Train:  {'loss': 0.9232032895088196, 'sparse_categorical_accuracy': 0.7272727489471436, 'sparse_top_k_categorical_accuracy': 0.9090909361839294}
Valid:  {'loss': 1.5674611330032349, 'sparse_categorical_accuracy': 0.6073018908500671, 'sparse_top_k_categorical_accuracy': 0.7858415246009827}

epoch = 3
Train:  {'loss': 0.5672282576560974, 'sparse_categorical_accuracy': 0.9090909361839294, 'sparse_top_k_categorical_accuracy': 0.9545454382896423}
Valid:  {'loss': 1.5064771175384521, 'sparse_categorical_accuracy': 0.645146906375885, 'sparse_top_k_categorical_accuracy': 0.8072128295898438}

epoch = 4
Train:  {'loss': 0.3684699237346649, 'sparse_categorical_accuracy': 0.9090909361839294, 

**3. Customized Training Loop**

Re-compilation of the model is not required in the customized training loop, just back-propagate the iterative parameters through the optimizer according to the loss function, which gives us the highest flexibility.

In [22]:
model_3 = create_model()
model_3.summary()
model_3 = compile_model(model_3)

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 300, 7)            216874    
_________________________________________________________________
conv1d_8 (Conv1D)            (None, 296, 64)           2304      
_________________________________________________________________
max_pooling1d_8 (MaxPooling1 (None, 148, 64)           0         
_________________________________________________________________
conv1d_9 (Conv1D)            (None, 146, 32)           6176      
_________________________________________________________________
max_pooling1d_9 (MaxPooling1 (None, 73, 32)            0         
_________________________________________________________________
flatten_4 (Flatten)          (None, 2336)              0         
_________________________________________________________________
dense_4 (Dense)              (None, 46)               

In [23]:
optimizer = optimizers.Nadam()
loss_func = losses.SparseCategoricalCrossentropy()

train_loss = metrics.Mean()
train_metric = metrics.SparseCategoricalAccuracy(name="train_accuracy")

valid_loss = metrics.Mean()
valid_metric = metrics.SparseCategoricalAccuracy(name="valid_accuracy")

In [26]:
@tf.function
def train_step(model, features, labels):
    with tf.GradientTape() as tape:
        predictions = model(features)
        loss = loss_func(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss.update_state(loss)
    train_metric.update_state(labels, predictions)

def valid_step(model, features, labels):
    predictions = model(features)
    loss = loss_func(labels, predictions)
    valid_loss.update_state(loss)
    valid_metric.update_state(labels, predictions)

def train_model(model, ds_train, ds_test, epoches):

    for epoch in tf.range(1, epoches+1):

        for features, labels in ds_train:
            train_step(model, features, labels)

        for features, labels in ds_train:
            valid_step(model, features, labels)

        logs = 'Epoch={},Loss:{},Accuracy:{},Valid Loss:{},Valid Accuracy:{}'
        
        if epoch%1 ==0:
            printbar()
            tf.print(tf.strings.format(logs,
            (epoch,train_loss.result(),train_metric.result(),valid_loss.result(),valid_metric.result())))
            tf.print("")

        #for next epoch, we reset the metrics
        train_loss.reset_states()
        train_metric.reset_states()
        valid_loss.reset_states()


In [27]:
train_model(model_3, ds_train, ds_test, 10)

Epoch=1,Loss:2.02902699,Accuracy:0.468269885,Valid Loss:1.6026293,Valid Accuracy:0.585393

Epoch=2,Loss:1.48710024,Accuracy:0.612781107,Valid Loss:1.23912287,Valid Accuracy:0.627866864

Epoch=3,Loss:1.19473851,Accuracy:0.685370743,Valid Loss:0.92709285,Valid Accuracy:0.670637548

Epoch=4,Loss:0.935973227,Accuracy:0.752282321,Valid Loss:0.664993882,Valid Accuracy:0.711311519

Epoch=5,Loss:0.708897114,Accuracy:0.814963281,Valid Loss:0.490126163,Valid Accuracy:0.745090187

Epoch=6,Loss:0.539092541,Accuracy:0.866288126,Valid Loss:0.385754317,Valid Accuracy:0.771914184

Epoch=7,Loss:0.43475759,Accuracy:0.894232929,Valid Loss:0.329027325,Valid Accuracy:0.792648792

Epoch=8,Loss:0.365386456,Accuracy:0.913827658,Valid Loss:0.292475045,Valid Accuracy:0.809340894

Epoch=9,Loss:0.317523807,Accuracy:0.924849689,Valid Loss:0.266854852,Valid Accuracy:0.82286793

Epoch=10,Loss:0.282765359,Accuracy:0.932086408,Valid Loss:0.249779209,Valid Accuracy:0.833967924

