In [1]:
import os
os.chdir("/content/drive/MyDrive/Colab/Callback")

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint

In [3]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
train_labels = train_labels[:1000]
test_labels = test_labels[:1000]
train_images = train_images[:1000].reshape(-1, 28 * 28) / 255.0
test_images = test_images[:1000].reshape(-1, 28 * 28) / 255.0

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [4]:
# Define a simple sequential model
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation='softmax')
        ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [5]:
# Create a basic model instance
model = create_model()

## Define checkpoint path and checkpoint directory

In [6]:
# Include the epoch in the file name (uses `str.format`)
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)#previoulsy, checkpoint_path = "training_1/cp.ckpt"


### Create a callback that saves model’s weights

In [7]:
# Create a callback that saves the model's weights every 5 epochs
cp_callback = ModelCheckpoint(
    filepath=checkpoint_path,
    verbose=1,
    save_weights_only=True,
    save_freq='epoch', period=5)



# Create a new model and model fitting with a new callback (training)

In [8]:
# Create a new model instance
model = create_model()

# Save the weights using the `checkpoint_path` format
model.save_weights(checkpoint_path.format(epoch=0))

# Train the model with the new callback
model.fit(train_images,
          train_labels,
          epochs=50,
          callbacks=[cp_callback],
          validation_data=(test_images,test_labels),
          verbose=2)

Epoch 1/50
32/32 - 8s - loss: 1.1868 - accuracy: 0.6550 - val_loss: 0.7548 - val_accuracy: 0.7790 - 8s/epoch - 250ms/step
Epoch 2/50
32/32 - 0s - loss: 0.4360 - accuracy: 0.8800 - val_loss: 0.5699 - val_accuracy: 0.8230 - 153ms/epoch - 5ms/step
Epoch 3/50
32/32 - 0s - loss: 0.2801 - accuracy: 0.9310 - val_loss: 0.4859 - val_accuracy: 0.8470 - 160ms/epoch - 5ms/step
Epoch 4/50
32/32 - 0s - loss: 0.2090 - accuracy: 0.9500 - val_loss: 0.4442 - val_accuracy: 0.8660 - 157ms/epoch - 5ms/step
Epoch 5/50

Epoch 5: saving model to training_2/cp-0005.ckpt
32/32 - 0s - loss: 0.1583 - accuracy: 0.9670 - val_loss: 0.4260 - val_accuracy: 0.8680 - 265ms/epoch - 8ms/step
Epoch 6/50
32/32 - 0s - loss: 0.1198 - accuracy: 0.9760 - val_loss: 0.4092 - val_accuracy: 0.8640 - 157ms/epoch - 5ms/step
Epoch 7/50
32/32 - 0s - loss: 0.0899 - accuracy: 0.9860 - val_loss: 0.4202 - val_accuracy: 0.8740 - 152ms/epoch - 5ms/step
Epoch 8/50
32/32 - 0s - loss: 0.0706 - accuracy: 0.9910 - val_loss: 0.4034 - val_accuracy:

<keras.src.callbacks.History at 0x7aba5b1f2830>

Then, we want to look at the list of resulting checkpoints and then check the lastest checkpoint and name it ‘latest’

In [9]:
latest = tf.train.latest_checkpoint(checkpoint_dir)
latest

'training_2/cp-0050.ckpt'

# Recreate a model and reload the latest checkpoint

In [10]:
# Create a new model instance
model = create_model()

# Load the previously saved weights
model.load_weights(latest)

# Re-evaluate the model
loss, acc = model.evaluate(test_images, test_labels, verbose=0)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Restored model, accuracy: 88.00%
