In [1]:
import tensorflow as tf
import os

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()

In [4]:
train_images.shape

(60000, 28, 28)

In [5]:
train_labels.shape

(60000,)

In [6]:
(test_images.shape, test_labels.shape)

((10000, 28, 28), (10000,))

In [7]:
train_images = train_images/255
test_images = test_images/255

In [8]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape = (28, 28)))
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

In [9]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [10]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc']
)

In [11]:
model.fit(train_images, train_labels, epochs=3)

Train on 60000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x2d54e78a988>

In [12]:
model.evaluate(test_images, test_labels, verbose=0)

[0.37847078092098235, 0.866]

# 模型保存

In [13]:
#model.save('less_model.h5')

In [14]:
new_model = tf.keras.models.load_model('less_model.h5')

In [15]:
new_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [16]:
new_model.evaluate(test_images, test_labels, verbose=0)

[0.3757324440479279, 0.8594]

此方法保存了模型配置、优化器配置、权重

# 仅保存架构，不保存优化器和权重

In [17]:
json_config = model.to_json()

In [18]:
json_config

'{"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "Flatten", "config": {"name": "flatten", "trainable": true, "batch_input_shape": [null, 28, 28], "dtype": "float32", "data_format": "channels_last"}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 10, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": 

In [19]:
reinitialized_model = tf.keras.models.model_from_json(json_config)

In [20]:
reinitialized_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 10)                1290      
Total params: 101,770
Trainable params: 101,770
Non-trainable params: 0
_________________________________________________________________


In [21]:
reinitialized_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc']
)

In [22]:
reinitialized_model.evaluate(test_images, test_labels, verbose=0)

[2.538090516281128, 0.0809]

# 只保存权重

In [23]:
weights = model.get_weights()

In [24]:
reinitialized_model.set_weights(weights)

In [25]:
reinitialized_model.evaluate(test_images, test_labels, verbose=0)

[0.37847078092098235, 0.866]

In [26]:
model.save_weights('less_weights.h5')

In [27]:
reinitialized_model.load_weights('less_weights.h5')

In [28]:
reinitialized_model.evaluate(test_images, test_labels, verbose=0)

[0.37847078092098235, 0.866]

# 在训练期间保存检查点

In [29]:
check_point_path = 'training_cp/cp.ckpt'

In [30]:
cp_callback = tf.keras.callbacks.ModelCheckpoint(check_point_path, 
                                  save_weights_only=True,
                                  )

In [31]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape = (28, 28)))
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

In [32]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc']
)

In [33]:
model.fit(train_images, train_labels, epochs=3)

Train on 60000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x2d61efe0948>

In [34]:
weight_model = tf.keras.Sequential()
weight_model.add(tf.keras.layers.Flatten(input_shape = (28, 28)))
weight_model.add(tf.keras.layers.Dense(128, activation = 'relu'))
weight_model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

In [35]:
weight_model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['acc']
)

In [36]:
weight_model.evaluate(test_images, test_labels, verbose=0)

[2.5757890968322754, 0.082]

In [37]:
weight_model.load_weights(check_point_path)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2d62898b108>

In [38]:
weight_model.evaluate(test_images, test_labels, verbose=0)

[0.3740061664581299, 0.8648]

# 自定义训练中保存检查点

In [39]:
model = tf.keras.Sequential()
model.add(tf.keras.layers.Flatten(input_shape = (28, 28)))
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dense(10, activation = 'softmax'))

In [40]:
optimizer = tf.keras.optimizers.Adam()

In [41]:
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

In [42]:
train_loss = tf.keras.metrics.Mean('train_loss', dtype = tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')

test_loss = tf.keras.metrics.Mean('test_loss', dtype = tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')

In [43]:
def loss(model, x, y):
    y_= model(x)
    return loss_func(y, y_)

In [51]:
def train_step(model, images, labels):
    with tf.GradientTape() as t:
        pred = model(images)
        loss_step = loss_func(labels, pred)
    grads = t.gradient(loss_step, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
    train_loss(loss_step)
    train_accuracy(labels, pred)

In [52]:
cp_dir = './customtrain_cp'
cp_prefix = os.path.join(cp_dir, 'ckpt')

In [53]:
checkpoint = tf.train.Checkpoint(optimizer = optimizer,
                                 model = model,
                                 #optimizer1 = optimizer1
)

In [54]:
dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels))

In [55]:
dataset = dataset.shuffle(10000).batch(32)

In [58]:
def train():
    for epoch in range(5):
        for (batch, (images, labels)) in enumerate(dataset):
            train_step(model, images, labels)
        print('Epoch {} loss is {}'.format(epoch, train_loss.result()))
        print('Epoch {} Accyract is {}'.format(epoch, train_accuracy.result()))
        train_loss.reset_states()
        train_accuracy.reset_states()
        if(epoch + 1) % 2 == 0:
            checkpoint.save(file_prefix = cp_prefix)            

In [59]:
train()

Epoch 0 loss is 1.6091670989990234
Epoch 0 Accyract is 0.8535333275794983
Epoch 1 loss is 1.6014986038208008
Epoch 1 Accyract is 0.8608666658401489
Epoch 2 loss is 1.5951642990112305
Epoch 2 Accyract is 0.8671500086784363
Epoch 3 loss is 1.591326355934143
Epoch 3 Accyract is 0.870116651058197
Epoch 4 loss is 1.5890311002731323
Epoch 4 Accyract is 0.8727499842643738


# 读取检查点

In [64]:
checkpoint.restore(tf.train.latest_checkpoint(cp_dir))

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2d63264df08>

In [69]:
tf.argmax(model(train_images, training=False), axis=-1).numpy()

array([9, 0, 0, ..., 3, 0, 5], dtype=int64)

In [70]:
train_labels

array([9, 0, 0, ..., 3, 0, 5], dtype=uint8)

In [72]:
(tf.argmax(model(train_images, training=False), axis=-1).numpy() == train_labels).sum()/len(train_labels)   #恢复模型正确率计算

0.8775666666666667