In [1]:
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [2]:
mnist = tf.keras.datasets.fashion_mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

In [3]:
os.makedirs("training_3", exist_ok=True)
ckpt_callback = ModelCheckpoint(filepath='training_3/weights.{epoch:02d}-{val_loss:.2f}.hdf5', monitor='val_loss')

In [4]:
def scheduler(epoch):
    if epoch < 2:
        return 0.001
    elif epoch < 6:
        return 0.0001
    else:
        return 0.00001

learning_rate_scheduler = LearningRateScheduler(scheduler, verbose=1)

In [5]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(512, activation=tf.nn.relu),
  tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=3, validation_split=0.2,
                    callbacks=[learning_rate_scheduler, ckpt_callback])

Train on 48000 samples, validate on 12000 samples

Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.
Epoch 1/3

Epoch 00002: LearningRateScheduler reducing learning rate to 0.001.
Epoch 2/3

Epoch 00003: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 3/3


### Resuming a Training Process

In [29]:
checkpoint_path = 'training_3/weights.03-0.32.hdf5'

In [30]:
del model  # deletes the existing model

In [31]:
# returns a compiled model
# identical to the previous one
model = load_model(checkpoint_path)

In [32]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=1, validation_split=0.2,
                    callbacks=[learning_rate_scheduler])

Train on 48000 samples, validate on 12000 samples

Epoch 00001: LearningRateScheduler reducing learning rate to 0.001.


Oops! Look at the learning rates!

In [9]:
del model  # deletes the existing model

In [10]:
# returns a compiled model
# identical to the previous one
model = load_model(checkpoint_path)

In [26]:
def get_init_epoch(path):
    return int(path.split('-')[-2].split('.')[-1])

In [27]:
# Finding the epoch index from which we are resuming
initial_epoch = get_init_epoch(checkpoint_path)

In [34]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=4, validation_split=0.2,
                    callbacks=[learning_rate_scheduler], initial_epoch=initial_epoch)

Train on 48000 samples, validate on 12000 samples

Epoch 00004: LearningRateScheduler reducing learning rate to 0.0001.
Epoch 4/4
