<a href="https://colab.research.google.com/github/Anjasfedo/Learning-TensorFlow/blob/main/eat_tensorflow2_in_30_days/Chapter6_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 6-2 Three Ways of Training

There are three ways of model training:
- Using pre-defined `fit` method.
- Using pre-defined `train_on_batch` method.
- Using customized training loop.

Note: `fit_generator` method is not recommended in `tf.keras` since it has been merge into `fit`

In [24]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import *

In [25]:
# Time stamps
@tf.function
def printbar():
    ts = tf.timestamp()
    today_ts = ts%(24*60*60)

    hour = tf.cast(today_ts//3600+8, tf.int32)%tf.constant(24)
    minute = tf.cast((today_ts%3600)//60, tf.int32)
    second = tf.cast(tf.floor(today_ts%60), tf.int32)

    def timeformat(m):
      if tf.strings.length(tf.strings.format("{}", m))==1:
        return(tf.strings.format("0{}", m))
      else:
        return(tf.strings.format("{}", m))

    timestrings = tf.strings.join([timeformat(hour), timeformat(minute), timeformat(second)], separator = ":")
    tf.print("=========="*8, end = "")
    tf.print(timestrings)

In [26]:
MAX_LEN = 300
BATCH_SIZE = 32

(x_train, y_train), (x_test, y_test) = datasets.reuters.load_data()
x_train = preprocessing.sequence.pad_sequences(x_train, maxlen = MAX_LEN)
x_test = preprocessing.sequence.pad_sequences(x_test, maxlen = MAX_LEN)

MAX_WORDS = x_train.max()+1
CAT_NUM = y_train.max()+1

In [27]:
ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test)) \
          .shuffle(buffer_size = 1000).batch(BATCH_SIZE) \
          .prefetch(tf.data.experimental.AUTOTUNE).cache()

## 1. Pre-defined `fit` Method

This is a powerful method, which supports training the data with types of numpy array, `tf.data.Dataset`, and Python generator.

This method also supports complicated logical controlling through proper configuration of the callbacks.

In [28]:
tf.keras.backend.clear_session()

def create_model():
  model = models.Sequential()
  model.add(layers.Embedding(MAX_WORDS, 7, input_length=MAX_LEN))
  model.add(layers.Conv1D(filters=64, kernel_size=5, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Conv1D(filters=32, kernel_size=3, activation="relu"))
  model.add(layers.MaxPool1D(2))
  model.add(layers.Flatten())
  model.add(layers.Dense(CAT_NUM, activation="softmax"))
  return (model)

def compile_model(model):
  model.compile(optimizer=optimizers.Nadam(),
                loss=losses.SparseCategoricalCrossentropy(),
                metrics=[metrics.SparseCategoricalAccuracy(), metrics.SparseTopKCategoricalAccuracy(5)])
  return (model)

model = create_model()
model.summary()
model = compile_model(model)



In [29]:
history = model.fit(ds_train, validation_data=ds_test, epochs=10)

Epoch 1/10
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 32ms/step - loss: 2.3685 - sparse_categorical_accuracy: 0.3906 - sparse_top_k_categorical_accuracy: 0.7145 - val_loss: 1.6198 - val_sparse_categorical_accuracy: 0.5984 - val_sparse_top_k_categorical_accuracy: 0.7671
Epoch 2/10
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 30ms/step - loss: 1.5073 - sparse_categorical_accuracy: 0.6239 - sparse_top_k_categorical_accuracy: 0.7944 - val_loss: 1.4682 - val_sparse_categorical_accuracy: 0.6318 - val_sparse_top_k_categorical_accuracy: 0.8037
Epoch 3/10
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 26ms/step - loss: 1.1772 - sparse_categorical_accuracy: 0.6908 - sparse_top_k_categorical_accuracy: 0.8567 - val_loss: 1.5228 - val_sparse_categorical_accuracy: 0.6451 - val_sparse_top_k_categorical_accuracy: 0.8192
Epoch 4/10
[1m281/281[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 26ms/step - loss: 0.8853 - sparse_categori

## 2. Pre-defined `train_on_batch` Method

This pre-defined method allows fine-controlling to the training procedure for each batch without the callbacks, which is even more flexible than `fit` method.

In [30]:
def train_model(model,ds_train,ds_valid,epoches):

    for epoch in tf.range(1,epoches+1):
        model.reset_metrics()

        # Reduce learning rate at the late stage of training
        if epoch == 5:
            model.optimizer.lr.assign(model.optimizer.lr/2.0)
            tf.print("Lowering optimizer Learning Rate...\n\n")

        for x, y in ds_train:
            train_result = model.train_on_batch(x, y)

        for x, y in ds_valid:
            valid_result = model.test_on_batch(x, y,reset_metrics=False)

        if epoch%1 ==0:
            printbar()
            tf.print("epoch = ",epoch)
            print("train:",dict(zip(model.metrics_names,train_result)))
            print("valid:",dict(zip(model.metrics_names,valid_result)))
            print("")

## 3. Customized Training Loop

Re-compilation of the model is not required in the customized training loop, just back-propagate the iterative parameters through the optimizer according to the loss function, which give the highest flexibility.

In [31]:
model = create_model()

In [32]:
optimizer = optimizers.Nadam()
loss_func = losses.SparseCategoricalCrossentropy()

train_loss = metrics.Mean(name='train_loss')
train_metric = metrics.SparseCategoricalAccuracy(name='train_accuracy')

valid_loss = metrics.Mean(name='valid_loss')
valid_metric = metrics.SparseCategoricalAccuracy(name='valid_accuracy')

In [33]:
@tf.function
def train_step(model, features, labels):
  with tf.GradientTape() as tape:
      predictions = model(features, training=True)
      loss = loss_func(labels, predictions)

  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss.update_state(loss)
  train_metric.update_state(labels, predictions)

In [34]:
@tf.function
def valid_step(model, features, labels):
  predictions = model(features)
  batch_loss = loss_func(labels, predictions)
  valid_loss.update_state(batch_loss)
  valid_metric.update_state(labels, predictions)

In [35]:
def train_model(model,ds_train,ds_valid,epochs):
  for epoch in tf.range(1,epochs+1):

    for features, labels in ds_train:
      train_step(model, features, labels)

    for features, labels in ds_valid:
      valid_step(model, features, labels)

    logs = 'Epoch={}, Loss:{}, Accuracy:{}, Valid Loss:{}, Valid Accuracy:{}'

    if epoch%1 == 0:
      printbar()
      tf.print(tf.strings.format(logs,
        (epoch, train_loss.result(), train_metric.result(), valid_loss.result(), valid_metric.result())))
      tf.print("")

    train_loss.reset_states()
    valid_loss.reset_states()
    train_metric.reset_states()
    valid_metric.reset_states()