In [1]:
from __future__ import absolute_import, division, print_function

import os

import tensorflow as tf
from tensorflow import keras

## Download and load dataset

In [2]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()

train_labels = train_labels[:1000]
test_labels = test_labels[:1000]

train_images = train_images[:1000].reshape(-1, 28*28) / 255.0
test_images = test_images[:1000].reshape(-1, 28*28) / 255.0

## Build model

In [3]:
def create_model():
    model = tf.keras.models.Sequential([
        keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(784,)),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10, activation=tf.nn.softmax)
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.sparse_categorical_crossentropy,
                  metrics=['accuracy']
                 )
    return model

In [4]:
model = create_model()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 512)               401920    
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5130      
Total params: 407,050
Trainable params: 407,050
Non-trainable params: 0
_________________________________________________________________


## Create checkpoint

In [5]:
checkpoint_path = 'training_1/cp.ckpt'
checkpoint_dir = os.path.dirname(checkpoint_path)

# Checkpoint callback
cp_callback = tf.keras.callbacks.ModelCheckpoint(checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1
                                                )

In [6]:
model = create_model()

model.fit(train_images, train_labels,
          epochs=10,
          validation_data=(test_images, test_labels),
          callbacks=[cp_callback]
         )

Train on 1000 samples, validate on 1000 samples
Epoch 1/10

Epoch 00001: saving model to training_1/cp.ckpt
Epoch 2/10

Epoch 00002: saving model to training_1/cp.ckpt
Epoch 3/10

Epoch 00003: saving model to training_1/cp.ckpt
Epoch 4/10

Epoch 00004: saving model to training_1/cp.ckpt
Epoch 5/10

Epoch 00005: saving model to training_1/cp.ckpt
Epoch 6/10

Epoch 00006: saving model to training_1/cp.ckpt
Epoch 7/10

Epoch 00007: saving model to training_1/cp.ckpt
Epoch 8/10

Epoch 00008: saving model to training_1/cp.ckpt
Epoch 9/10

Epoch 00009: saving model to training_1/cp.ckpt
Epoch 10/10

Epoch 00010: saving model to training_1/cp.ckpt


<tensorflow.python.keras.callbacks.History at 0x7ff316aeb710>

## Evaluation of untrained model and loaded model

In [7]:
# Untrained model
model = create_model()

loss, acc = model.evaluate(test_images, test_labels)
print("Untrained model, accuracy: {:5.2f}%".format(100*acc))

Untrained model, accuracy: 12.10%


In [8]:
# Trained model
model.load_weights(checkpoint_path)

loss, acc = model.evaluate(test_images, test_labels)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Restored model, accuracy: 87.30%


## Use epochs in checkpoint filename

In [9]:
checkpoint_path = "training_2/cp-{epoch:04d}.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)

In [10]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(
    checkpoint_path,
    verbose=1,
    save_weights_only=True,
    period=5 # Save model every 5th epoch
)

In [11]:
model = create_model()
model.fit(train_images,
          train_labels,
          epochs=50,
          callbacks=[cp_callback],
          validation_data=(test_images, test_labels),
          verbose=0)


Epoch 00005: saving model to training_2/cp-0005.ckpt

Epoch 00010: saving model to training_2/cp-0010.ckpt

Epoch 00015: saving model to training_2/cp-0015.ckpt

Epoch 00020: saving model to training_2/cp-0020.ckpt

Epoch 00025: saving model to training_2/cp-0025.ckpt

Epoch 00030: saving model to training_2/cp-0030.ckpt

Epoch 00035: saving model to training_2/cp-0035.ckpt

Epoch 00040: saving model to training_2/cp-0040.ckpt

Epoch 00045: saving model to training_2/cp-0045.ckpt

Epoch 00050: saving model to training_2/cp-0050.ckpt


<tensorflow.python.keras.callbacks.History at 0x7ff3081213d0>

## Ignore this part. Fix later maybe

In [15]:
import pathlib

# Sort the checkpoints by modification time.
checkpoints = pathlib.Path(checkpoint_dir).glob("*.index")
checkpoints = sorted(checkpoints, key=lambda cp:cp.stat().st_mtime)
checkpoints = [cp.with_suffix('') for cp in checkpoints]
latest = str(checkpoints[-1])
checkpoints

ValueError: Invalid suffix ''

## Restoring latest model

In [14]:
model = create_model()
model.load_weights(latest)
loss, acc = model.evaluate(test_images, test_labels)
print("Restored model, accuracy: {:5.2f}%".format(100*acc))

Instructions for updating:
Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.
INFO:tensorflow:Restoring parameters from training_2/cp-0050.ckpt.index


NotFoundError: Tensor name "dense_8/bias" not found in checkpoint files training_2/cp-0050.ckpt.index
	 [[Node: save_2/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_2/Const_0_0, save_2/RestoreV2/tensor_names, save_2/RestoreV2/shape_and_slices)]]

Caused by op u'save_2/RestoreV2', defined at:
  File "/usr/lib/python2.7/runpy.py", line 174, in _run_module_as_main
    "__main__", fname, loader, pkg_name)
  File "/usr/lib/python2.7/runpy.py", line 72, in _run_code
    exec code in run_globals
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/lib/python2.7/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/kernelapp.py", line 486, in start
    self.io_loop.start()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tornado/ioloop.py", line 1073, in start
    handler_func(fd_obj, events)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2714, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2818, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/IPython/core/interactiveshell.py", line 2878, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-14-8345c31dcbb8>", line 2, in <module>
    model.load_weights(latest)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/keras/engine/network.py", line 1371, in load_weights
    finalizer()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/checkpointable/util.py", line 815, in run_restore_ops
    saver_lib.Saver(saveables).restore(
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1284, in __init__
    self.build()
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1296, in build
    self._build(self._filename, build_save=True, build_restore=True)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 1333, in _build
    build_save=build_save, build_restore=build_restore)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 781, in _build_internal
    restore_sequentially, reshape)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 400, in _AddRestoreOps
    restore_sequentially)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/training/saver.py", line 832, in bulk_restore
    return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1463, in restore_v2
    shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
    op_def=op_def)
  File "/home/hobbes/code/Feedforward-Neural-Network-with-Tensorflow/venv/local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

NotFoundError (see above for traceback): Tensor name "dense_8/bias" not found in checkpoint files training_2/cp-0050.ckpt.index
	 [[Node: save_2/RestoreV2 = RestoreV2[dtypes=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save_2/Const_0_0, save_2/RestoreV2/tensor_names, save_2/RestoreV2/shape_and_slices)]]
