In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
import os

In [2]:
from keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) =  mnist.load_data()

Using TensorFlow backend.


In [3]:
train_labels = train_labels[:1000]
test_labels = test_labels[:1000]
train_images = train_images[:1000].reshape(-1, 28 * 28) / 255.0
test_images = test_images[:1000].reshape(-1, 28 * 28) / 255.0

In [4]:
# Define a simple sequential model
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [5]:
# Create a basic model instance
model = create_model()

### Define checkpoint path and checkpoint directory

In [6]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)#previoulsy, checkpoint_path = "training_1/cp.ckpt"

### Create a callback that saves model’s weights

In [7]:
# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq='epoch', period=5)



### Create a new model and model fitting with a new callback (training)

In [8]:
# Create a new model instance
model = create_model()

# Save the weights using the `checkpoint_path` format
model.save_weights(checkpoint_path.format(epoch=0))

# Train the model with the new callback
model.fit(train_images, 
          train_labels,
          epochs=50, 
          callbacks=[cp_callback],
          validation_data=(test_images,test_labels),
          verbose=2)

Train on 1000 samples, validate on 1000 samples
Epoch 1/50
1000/1000 - 2s - loss: 1.1937 - accuracy: 0.6590 - val_loss: 0.7282 - val_accuracy: 0.7760
Epoch 2/50
1000/1000 - 1s - loss: 0.4349 - accuracy: 0.8800 - val_loss: 0.5158 - val_accuracy: 0.8440
Epoch 3/50
1000/1000 - 1s - loss: 0.2928 - accuracy: 0.9250 - val_loss: 0.4768 - val_accuracy: 0.8540
Epoch 4/50
1000/1000 - 1s - loss: 0.1998 - accuracy: 0.9590 - val_loss: 0.4576 - val_accuracy: 0.8610
Epoch 5/50

Epoch 00005: saving model to training_2/cp-0005.ckpt
1000/1000 - 1s - loss: 0.1569 - accuracy: 0.9690 - val_loss: 0.4517 - val_accuracy: 0.8450
Epoch 6/50
1000/1000 - 1s - loss: 0.1279 - accuracy: 0.9740 - val_loss: 0.4261 - val_accuracy: 0.8660
Epoch 7/50
1000/1000 - 1s - loss: 0.0910 - accuracy: 0.9840 - val_loss: 0.4116 - val_accuracy: 0.8640
Epoch 8/50
1000/1000 - 1s - loss: 0.0646 - accuracy: 0.9950 - val_loss: 0.4138 - val_accuracy: 0.8640
Epoch 9/50
1000/1000 - 1s - loss: 0.0477 - accuracy: 0.9960 - val_loss: 0.4171 - v

<tensorflow.python.keras.callbacks.History at 0x1c76eddaf48>

Then, we want to look at the list of resulting checkpoints and then check the lastest checkpoint and name it ‘latest’

In [9]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

'training_2\\cp-0050.ckpt'

### Recreate a model and reload the latest checkpoint

In [10]:
#Remove the model
del model
# Create a new model instance
model = create_model()

# Load the previously saved weights
model.load_weights(latest)

# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=0)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Restored model, accuracy: 87.30%
