In [1]:
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras

###要把另一个jupyter notebook关掉，不然这个训练时有影响，导致一训练服务就要重启

In [2]:
#1.一般的模型构造、训练、测试流程

# 模型构造
inputs = keras.Input(shape=(784,), name='mnist_input')
h1 = layers.Dense(64, activation='relu')(inputs)
h1 = layers.Dense(64, activation='relu')(h1)
outputs = layers.Dense(10, activation='softmax')(h1)
model = keras.Model(inputs, outputs)
# keras.utils.plot_model(model, 'net001.png', show_shapes=True)

model.compile(optimizer=keras.optimizers.RMSprop(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=[keras.metrics.SparseCategoricalAccuracy()])

# 载入数据
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') /255
x_test = x_test.reshape(10000, 784).astype('float32') /255

x_val = x_train[-10000:]
y_val = y_train[-10000:]

x_train = x_train[:-10000]
y_train = y_train[:-10000]

# 训练模型
history = model.fit(x_train, y_train, batch_size=64, epochs=3,
         validation_data=(x_val, y_val))
print('history:')
print(history.history)

result = model.evaluate(x_test, y_test, batch_size=128)
print('evaluate:')
print(result)
pred = model.predict(x_test[:2])
print('predict:')
print(pred)




W0710 03:09:29.418821 140497473152768 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Train on 50000 samples, validate on 10000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3
history:
{'sparse_categorical_accuracy': [0.90172, 0.95268, 0.9644], 'val_loss': [0.17890676038265227, 0.13857471915781497, 0.1104788906276226], 'loss': [0.3419858175897598, 0.15977194753885268, 0.11627530758261681], 'val_sparse_categorical_accuracy': [0.9483, 0.9611, 0.9686]}
evaluate:
[0.1082186477124691, 0.9675]
predict:
[[9.6373697e-08 8.2183647e-08 1.2031219e-04 7.9352531e-04 1.4178224e-10
  6.8006989e-07 4.3007850e-12 9.9908352e-01 2.7640309e-07 1.5287484e-06]
 [7.2129455e-07 1.2672909e-04 9.9928159e-01 5.7528092e-04 8.3215275e-14
  1.6059768e-06 1.6569119e-06 1.5321676e-08 1.2436299e-05 8.9141517e-14]]


In [3]:
# 2.自定义损失和指标
# 自定义指标只需继承Metric类， 并重写一下函数

# _init_(self)，初始化。

# update_state(self，y_true，y_pred，sample_weight = None)，它使用目标y_true和模型预测y_pred来更新状态变量。

# result(self)，它使用状态变量来计算最终结果。

# reset_states(self)，重新初始化度量的状态。
# 这是一个简单的示例，显示如何实现CatgoricalTruePositives指标，该指标计算正确分类为属于给定类的样本数量

class CatgoricalTruePostives(keras.metrics.Metric):
    def __init__(self, name='binary_true_postives', **kwargs):
        super(CatgoricalTruePostives, self).__init__(name=name, **kwargs)
        self.true_postives = self.add_weight(name='tp', initializer='zeros')
        
    def update_state(self, y_true, y_pred, sample_weight=None):
        y_pred = tf.argmax(y_pred)
        y_true = tf.equal(tf.cast(y_pred, tf.int32), tf.cast(y_true, tf.int32))
        
        y_true = tf.cast(y_true, tf.float32)
        
        if sample_weight is not None:
            sample_weight = tf.cast(sample_weight, tf.float32)
            y_true = tf.multiply(sample_weight, y_true)
            
        return self.true_postives.assign_add(tf.reduce_sum(y_true))
    
    def result(self):
        return tf.identity(self.true_postives)
    
    def reset_states(self):
        self.true_postives.assign(0.)
        

model.compile(optimizer=keras.optimizers.RMSprop(1e-3),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=[CatgoricalTruePostives()])

model.fit(x_train, y_train,
         batch_size=64, epochs=3)
            
            
            


Train on 50000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fc74c0fd160>

In [4]:
# 以定义网络层的方式添加网络loss
class ActivityRegularizationLayer(layers.Layer):
    def call(self, inputs):
        self.add_loss(tf.reduce_sum(inputs) * 0.1)
        return inputs

inputs = keras.Input(shape=(784,), name='mnist_input')
h1 = layers.Dense(64, activation='relu')(inputs)
h1 = ActivityRegularizationLayer()(h1)
h1 = layers.Dense(64, activation='relu')(h1)
outputs = layers.Dense(10, activation='softmax')(h1)
model = keras.Model(inputs, outputs)
# keras.utils.plot_model(model, 'net001.png', show_shapes=True)

model.compile(optimizer=keras.optimizers.RMSprop(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(x_train, y_train, batch_size=32, epochs=1)


Train on 50000 samples


<tensorflow.python.keras.callbacks.History at 0x7fc70c7ea390>

In [5]:
# 也可以以定义网络层的方式添加要统计的metric
class MetricLoggingLayer(layers.Layer):
    def call(self, inputs):
        self.add_metric(keras.backend.std(inputs),
                       name='std_of_activation',
                       aggregation='mean')
        
        return inputs

inputs = keras.Input(shape=(784,), name='mnist_input')
h1 = layers.Dense(64, activation='relu')(inputs)
h1 = MetricLoggingLayer()(h1)
h1 = layers.Dense(64, activation='relu')(h1)
outputs = layers.Dense(10, activation='softmax')(h1)
model = keras.Model(inputs, outputs)
# keras.utils.plot_model(model, 'net001.png', show_shapes=True)

model.compile(optimizer=keras.optimizers.RMSprop(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(x_train, y_train, batch_size=32, epochs=1)


Train on 50000 samples


<tensorflow.python.keras.callbacks.History at 0x7fc70c365cf8>

In [6]:
# 也可以直接在model上面加
# 也可以以定义网络层的方式添加要统计的metric
class MetricLoggingLayer(layers.Layer):
    def call(self, inputs):
        self.add_metric(keras.backend.std(inputs),
                       name='std_of_activation',
                       aggregation='mean')
        
        return inputs

inputs = keras.Input(shape=(784,), name='mnist_input')
h1 = layers.Dense(64, activation='relu')(inputs)
h2 = layers.Dense(64, activation='relu')(h1)
outputs = layers.Dense(10, activation='softmax')(h2)
model = keras.Model(inputs, outputs)

model.add_metric(keras.backend.std(inputs),
                       name='std_of_activation',
                       aggregation='mean')
model.add_loss(tf.reduce_sum(h1)*0.1)

# keras.utils.plot_model(model, 'net001.png', show_shapes=True)

model.compile(optimizer=keras.optimizers.RMSprop(),
             loss=keras.losses.SparseCategoricalCrossentropy(),
             metrics=[keras.metrics.SparseCategoricalAccuracy()])
model.fit(x_train, y_train, batch_size=32, epochs=1)


Train on 50000 samples


<tensorflow.python.keras.callbacks.History at 0x7fc70c0f32b0>

In [7]:
#处理使用validation_data传入测试数据，还可以使用validation_split划分验证数据

# ps:validation_split只能在用numpy数据训练的情况下使用


model.fit(x_train, y_train, batch_size=32, epochs=1, validation_split=0.2)


Train on 40000 samples, validate on 10000 samples


<tensorflow.python.keras.callbacks.History at 0x7fc814566e80>

In [8]:
# 3.使用tf.data构造数据
def get_compiled_model():
    inputs = keras.Input(shape=(784,), name='mnist_input')
    h1 = layers.Dense(64, activation='relu')(inputs)
    h2 = layers.Dense(64, activation='relu')(h1)
    outputs = layers.Dense(10, activation='softmax')(h2)
    model = keras.Model(inputs, outputs)
    model.compile(optimizer=keras.optimizers.RMSprop(),
                 loss=keras.losses.SparseCategoricalCrossentropy(),
                 metrics=[keras.metrics.SparseCategoricalAccuracy()])
    return model
model = get_compiled_model()
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)

val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(64)

# model.fit(train_dataset, epochs=3)
# steps_per_epoch 每个epoch只训练几步
# validation_steps 每次验证，验证几步
model.fit(train_dataset, epochs=3, steps_per_epoch=100,
         validation_data=val_dataset, validation_steps=3)




Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fc6e0f4eb00>

In [10]:
# 4.样本权重和类权重
# “样本权重”数组是一个数字数组，用于指定批处理中每个样本在计算总损失时应具有多少权重。 它通常用于不平衡的分类问题（这个想法是为了给予很少见的类更多的权重）。 当使用的权重是1和0时，该数组可以用作损失函数的掩码（完全丢弃某些样本对总损失的贡献）。

# “类权重”dict是同一概念的更具体的实例：它将类索引映射到应该用于属于该类的样本的样本权重。 例如，如果类“0”比数据中的类“1”少两倍，则可以使用class_weight = {0：1.，1：0.5}。
# --------------------- 
# 增加第5类的权重
import numpy as np
#类权重
model = get_compiled_model()
class_weight = {i:1.0 for i in range(10)}#共0-9 10个类别，每个类别权重是1
class_weight[5] = 2.0#第5类权重是2
print(class_weight)
model.fit(x_train, y_train,
         class_weight=class_weight,
         batch_size=64,
         epochs=4)


{0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 2.0, 6: 1.0, 7: 1.0, 8: 1.0, 9: 1.0}
Train on 50000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fc6d144a908>

In [11]:
#样本权重
#y_train是0-9共10个类别，array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4], dtype=uint8)
model = get_compiled_model()
sample_weight = np.ones(shape=(len(y_train),))##先将所有的训练样本权重都设为1
sample_weight[y_train == 5] = 2.0##针对类别为5的样本，权重设为2
model.fit(x_train, y_train,
         sample_weight=sample_weight,
         batch_size=64,
         epochs=4)



Train on 50000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<tensorflow.python.keras.callbacks.History at 0x7fc6d0f04c88>

In [14]:
y_train[:20]

array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9],
      dtype=uint8)

In [15]:
# 5.多输入多输出模型
image_input = keras.Input(shape=(32, 32, 3), name='img_input')
timeseries_input = keras.Input(shape=(None, 10), name='ts_input')

x1 = layers.Conv2D(3, 3)(image_input)
x1 = layers.GlobalMaxPooling2D()(x1)

x2 = layers.Conv1D(3, 3)(timeseries_input)
x2 = layers.GlobalMaxPooling1D()(x2)

x = layers.concatenate([x1, x2])

score_output = layers.Dense(1, name='score_output')(x)
class_output = layers.Dense(5, activation='softmax', name='class_output')(x)

model = keras.Model(inputs=[image_input, timeseries_input],
                    outputs=[score_output, class_output])
keras.utils.plot_model(model, 'multi_input_output_model.png'
                       , show_shapes=True)


Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [16]:
# 可以为模型指定不同的loss和metrics
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss=[keras.losses.MeanSquaredError(),
          keras.losses.CategoricalCrossentropy()])

# 还可以指定loss的权重
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={'score_output': keras.losses.MeanSquaredError(),
          'class_output': keras.losses.CategoricalCrossentropy()},
    metrics={'score_output': [keras.metrics.MeanAbsolutePercentageError(),
                              keras.metrics.MeanAbsoluteError()],
             'class_output': [keras.metrics.CategoricalAccuracy()]},
    loss_weight={'score_output': 2., 'class_output': 1.})

# 可以把不需要传播的loss置0
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss=[None, keras.losses.CategoricalCrossentropy()])

# Or dict loss version
model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={'class_output': keras.losses.CategoricalCrossentropy()})


W0710 03:44:17.880979 140497473152768 training_utils.py:1237] Output score_output missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to score_output.


In [None]:
# 6.使用回调
# Keras中的回调是在训练期间（在epoch开始时，batch结束时，epoch结束时等）在不同点调用的对象，可用于实现以下行为：

# 在培训期间的不同时间点进行验证（超出内置的每个时期验证）
# 定期检查模型或超过某个精度阈值
# 在训练似乎平稳时改变模型的学习率
# 在训练似乎平稳时对顶层进行微调
# 在培训结束或超出某个性能阈值时发送电子邮件或即时消息通知等等。
# 可使用的内置回调有

# ModelCheckpoint：定期保存模型。
# EarlyStopping：当训练不再改进验证指标时停止培训。
# TensorBoard：定期编写可在TensorBoard中显示的模型日志（更多细节见“可视化”）。
# CSVLogger：将丢失和指标数据流式传输到CSV文件。
# 等等


In [17]:
# 6.1回调使用
model = get_compiled_model()

callbacks = [
    keras.callbacks.EarlyStopping(
        # Stop training when `val_loss` is no longer improving
        monitor='val_loss',
        # "no longer improving" being defined as "no better than 1e-2 less"
        min_delta=1e-2,
        # "no longer improving" being further defined as "for at least 2 epochs"
        patience=2,
        verbose=1)
]
model.fit(x_train, y_train,
          epochs=20,
          batch_size=64,
          callbacks=callbacks,
          validation_split=0.2)


Train on 40000 samples, validate on 10000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 00006: early stopping


<tensorflow.python.keras.callbacks.History at 0x7fc6d06f0e48>

In [18]:
# checkpoint模型回调
model = get_compiled_model()
check_callback = keras.callbacks.ModelCheckpoint(
    filepath='mymodel_{epoch}.h5',
    save_best_only=True,
    monitor='val_loss',
    verbose=1
)

model.fit(x_train, y_train,
         epochs=3,
         batch_size=64,
         callbacks=[check_callback],
         validation_split=0.2)




Train on 40000 samples, validate on 10000 samples
Epoch 1/3
Epoch 00001: val_loss improved from inf to 0.23461, saving model to mymodel_1.h5
Epoch 2/3
Epoch 00002: val_loss improved from 0.23461 to 0.17526, saving model to mymodel_2.h5
Epoch 3/3
Epoch 00003: val_loss did not improve from 0.17526


<tensorflow.python.keras.callbacks.History at 0x7fc6d01daf98>

In [19]:
# 动态调整学习率
initial_learning_rate = 0.1
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)
optimizer = keras.optimizers.RMSprop(learning_rate=lr_schedule)


In [20]:
# 使用tensorboard
tensorboard_cbk = keras.callbacks.TensorBoard(log_dir='./full_path_to_your_logs')
model.fit(x_train, y_train,
         epochs=5,
         batch_size=64,
         callbacks=[tensorboard_cbk],
         validation_split=0.2)



Train on 40000 samples, validate on 10000 samples


W0710 05:36:08.638334 140497473152768 callbacks.py:241] Method (on_train_batch_end) is slow compared to the batch update (0.121348). Check your callbacks.


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7fc6c2837f28>

In [21]:
# 6.2创建自己的回调方法
class LossHistory(keras.callbacks.Callback):
    def on_train_begin(self, logs):
        self.losses = []
    def on_epoch_end(self, batch, logs):
        self.losses.append(logs.get('loss'))
        print('\nloss:',self.losses[-1])
        
model = get_compiled_model()

callbacks = [
    LossHistory()
]
model.fit(x_train, y_train,
          epochs=3,
          batch_size=64,
          callbacks=callbacks,
          validation_split=0.2)


Train on 40000 samples, validate on 10000 samples
Epoch 1/3
loss: 0.35710057483315466
Epoch 2/3
loss: 0.16733438974022866
Epoch 3/3
loss: 0.12134051643908024


<tensorflow.python.keras.callbacks.History at 0x7fc6c2705898>

In [22]:
#7.自己构造训练和验证循环
# Get the model.
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# Instantiate an optimizer.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy()

# Prepare the training dataset.
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# 自己构造循环
for epoch in range(3):
    print('epoch: ', epoch)
    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        # 开一个gradient tape, 计算梯度
        with tf.GradientTape() as tape:
            logits = model(x_batch_train)
            
            loss_value = loss_fn(y_batch_train, logits)
            grads = tape.gradient(loss_value, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            
        if step % 200 == 0:
            print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
            print('Seen so far: %s samples' % ((step + 1) * 64))
            


epoch:  0
Training loss (for one batch) at step 0: 2.3425493240356445
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.2850112915039062
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.1744418144226074
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 2.152705430984497
Seen so far: 38464 samples
epoch:  1
Training loss (for one batch) at step 0: 2.0394225120544434
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.047250270843506
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.8622875213623047
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.8848987817764282
Seen so far: 38464 samples
epoch:  2
Training loss (for one batch) at step 0: 1.7007428407669067
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.7132885456085205
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.4844062328338623
Seen so far: 25664 samples
Training

In [23]:
# 训练并验证
# Get model
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)
model = keras.Model(inputs=inputs, outputs=outputs)

# Instantiate an optimizer to train the model.
optimizer = keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = keras.losses.SparseCategoricalCrossentropy()

# Prepare the metrics.
train_acc_metric = keras.metrics.SparseCategoricalAccuracy() 
val_acc_metric = keras.metrics.SparseCategoricalAccuracy()

# Prepare the training dataset.
batch_size = 64
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)

# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(64)


# Iterate over epochs.
for epoch in range(3):
  print('Start of epoch %d' % (epoch,))
  
  # Iterate over the batches of the dataset.
  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train)
      loss_value = loss_fn(y_batch_train, logits)
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
      
    # Update training metric.
    train_acc_metric(y_batch_train, logits)

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 64))

  # Display metrics at the end of each epoch.
  train_acc = train_acc_metric.result()
  print('Training acc over epoch: %s' % (float(train_acc),))
  # Reset training metrics at the end of each epoch
  train_acc_metric.reset_states()

  # Run a validation loop at the end of each epoch.
  for x_batch_val, y_batch_val in val_dataset:
    val_logits = model(x_batch_val)
    # Update val metrics
    val_acc_metric(y_batch_val, val_logits)
  val_acc = val_acc_metric.result()
  val_acc_metric.reset_states()
  print('Validation acc: %s' % (float(val_acc),))


Start of epoch 0
Training loss (for one batch) at step 0: 2.401185989379883
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.3178329467773438
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.1804118156433105
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 2.098374843597412
Seen so far: 38464 samples
Training acc over epoch: 0.1739400029182434
Validation acc: 0.35350000858306885
Start of epoch 1
Training loss (for one batch) at step 0: 2.104534864425659
Seen so far: 64 samples
Training loss (for one batch) at step 200: 1.948502540588379
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 1.8732008934020996
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 1.7152416706085205
Seen so far: 38464 samples
Training acc over epoch: 0.46643999218940735
Validation acc: 0.5687999725341797
Start of epoch 2
Training loss (for one batch) at step 0: 1.7561230659484863
Seen so far: 64 samples
Trainin

In [24]:
##　添加自己构造的loss, 每次只能看到最新一次训练增加的loss
class ActivityRegularizationLayer(layers.Layer):
  
  def call(self, inputs):
    self.add_loss(1e-2 * tf.reduce_sum(inputs))
    return inputs
  
inputs = keras.Input(shape=(784,), name='digits')
x = layers.Dense(64, activation='relu', name='dense_1')(inputs)
# Insert activity regularization as a layer
x = ActivityRegularizationLayer()(x)
x = layers.Dense(64, activation='relu', name='dense_2')(x)
outputs = layers.Dense(10, activation='softmax', name='predictions')(x)

model = keras.Model(inputs=inputs, outputs=outputs)
logits = model(x_train[:64])
print(model.losses)
logits = model(x_train[:64])
logits = model(x_train[64: 128])
logits = model(x_train[128: 192])
print(model.losses)



[<tf.Tensor: id=834812, shape=(), dtype=float32, numpy=7.2620053>]
[<tf.Tensor: id=834873, shape=(), dtype=float32, numpy=7.0907006>]


In [None]:
# 将loss添加进求导中
optimizer = keras.optimizers.SGD(learning_rate=1e-3)

for epoch in range(3):
  print('Start of epoch %d' % (epoch,))

  for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
    with tf.GradientTape() as tape:
      logits = model(x_batch_train)
      loss_value = loss_fn(y_batch_train, logits)

      # Add extra losses created during this forward pass:
      loss_value += sum(model.losses)
      
    grads = tape.gradient(loss_value, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))

    # Log every 200 batches.
    if step % 200 == 0:
        print('Training loss (for one batch) at step %s: %s' % (step, float(loss_value)))
        print('Seen so far: %s samples' % ((step + 1) * 64))


Start of epoch 0
Training loss (for one batch) at step 0: 9.64475154876709
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.512377977371216
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.4083564281463623
Seen so far: 25664 samples
Training loss (for one batch) at step 600: 2.3370771408081055
Seen so far: 38464 samples
Start of epoch 1
Training loss (for one batch) at step 0: 2.330214500427246
Seen so far: 64 samples
Training loss (for one batch) at step 200: 2.3290836811065674
Seen so far: 12864 samples
Training loss (for one batch) at step 400: 2.3323705196380615
Seen so far: 25664 samples
