In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
import os

In [2]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
train_labels = train_labels[:1000]
test_labels = test_labels[:1000]
train_images = train_images[:1000].reshape(-1, 28 * 28) / 255.0
test_images = test_images[:1000].reshape(-1, 28 * 28) / 255.0

In [3]:
# Define a simple sequential model
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [4]:
# Create a basic model instance
model = create_model()

### Define checkpoint path and checkpoint directory

In [5]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)#previoulsy, checkpoint_path = "training_1/cp.ckpt"

### Create a callback that saves model’s weights

In [10]:
# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1, 
    save_weights_only=True,
    save_freq='epoch', period=5)



### Create a new model and model fitting with a new callback (training)

In [11]:
# Create a new model instance
model = create_model()

# Save the weights using the `checkpoint_path` format
model.save_weights(checkpoint_path.format(epoch=0))

# Train the model with the new callback
model.fit(train_images, 
          train_labels,
          epochs=50, 
          callbacks=[cp_callback],
          validation_data=(test_images,test_labels),
          verbose=2)

Train on 1000 samples, validate on 1000 samples
Epoch 1/50
1000/1000 - 1s - loss: 1.1774 - accuracy: 0.6810 - val_loss: 0.7282 - val_accuracy: 0.7870
Epoch 2/50
1000/1000 - 0s - loss: 0.4215 - accuracy: 0.8900 - val_loss: 0.5476 - val_accuracy: 0.8310
Epoch 3/50
1000/1000 - 1s - loss: 0.2932 - accuracy: 0.9170 - val_loss: 0.5125 - val_accuracy: 0.8380
Epoch 4/50
1000/1000 - 0s - loss: 0.2177 - accuracy: 0.9460 - val_loss: 0.4848 - val_accuracy: 0.8410
Epoch 5/50

Epoch 00005: saving model to training_2/cp-0005.ckpt
1000/1000 - 1s - loss: 0.1684 - accuracy: 0.9620 - val_loss: 0.4506 - val_accuracy: 0.8470
Epoch 6/50
1000/1000 - 0s - loss: 0.1178 - accuracy: 0.9730 - val_loss: 0.4180 - val_accuracy: 0.8670
Epoch 7/50
1000/1000 - 0s - loss: 0.0879 - accuracy: 0.9880 - val_loss: 0.4274 - val_accuracy: 0.8630
Epoch 8/50
1000/1000 - 0s - loss: 0.0635 - accuracy: 0.9950 - val_loss: 0.4175 - val_accuracy: 0.8730
Epoch 9/50
1000/1000 - 0s - loss: 0.0509 - accuracy: 0.9970 - val_loss: 0.3949 - v

<tensorflow.python.keras.callbacks.History at 0x252a0b506a0>

Then, we want to look at the list of resulting checkpoints and then check the lastest checkpoint and name it ‘latest’

In [12]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

'training_2\\cp-0050.ckpt'

### Recreate a model and reload the latest checkpoint

In [15]:
# Create a new model instance
model = create_model()

# Load the previously saved weights
model.load_weights(latest)

# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=0)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Restored model, accuracy: 87.80%
