**7.1 不用Sequential模型的解决方案：Keras函数式API**

函数式API简介

In [1]:
from keras import Input, layers

input_tensor = Input(shape=(32,)) # 一个张量

dense = layers.Dense(32, activation='relu') # 一个层是一个函数

ouput_tensor = dense(input_tensor) # 可以在一个张量上调用一个层，它会返回一个张量

Using TensorFlow backend.


In [2]:
# 一个简单的Sequential模型以及对应的函数式API实现
from keras.models import Sequential, Model
from keras import layers
from keras import Input

# 前面学过的Sequential模型
seq_model = Sequential()
seq_model.add(layers.Dense(32, activation='relu', input_shape=(64,)))
seq_model.add(layers.Dense(32, activation='relu'))
seq_model.add(layers.Dense(10, activation='softmax'))

# 对应的函数式API实现
input_tensor = Input(shape=(64,))
x = layers.Dense(32, activation='relu')(input_tensor)
x = layers.Dense(32, activation='relu')(x)
output_tensor = layers.Dense(10, activation='softmax')(x)

model = Model(input_tensor, output_tensor) # Model类将输入张量和输出张量转换为一个模型

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_6 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_7 (Dense)              (None, 10)                330       
Total params: 3,466
Trainable params: 3,466
Non-trainable params: 0
_________________________________________________________________


In [4]:
# 对Model实例进行编译、训练或评估，其API与Sequential模型相同
model.compile(optimizer='rmsprop', loss='categorical_crossentropy') # 编译模型

import numpy as np
# 用于生成训练的虚构Numpy数据
x_train = np.random.random((1000, 64))
y_train = np.random.random((1000, 10))

model.fit(x_train, y_train, epochs=10, batch_size=128) # 训练10轮模型

score = model.evaluate(x_train, y_train)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
score

11.496811447143555

多输入模型

In [25]:
# 用函数式API实现双输入问答模型
from keras.models import Model
from keras import layers
from keras import Input

text_vocabulary_size = 10000
question_vocabulary_size = 10000
answer_vocabulary_size = 500

text_input = Input(shape=(None,), dtype='int32', name='text') # 文本输入是一个长度可变的整数序列。
                                                              # 注意，你可以选择对输入进行命名
embedding_text = layers.Embedding(text_vocabulary_size, 64)(text_input) # 将输入嵌入长度为64的向量

encoded_text = layers.LSTM(32)(embedding_text) # 利用该LSTM将向量编码为单个向量

question_input = Input(shape=(None,), dtype='int32', name='question') # 对问题进行相同的处理（使用不同的层实例）

embedded_question = layers.Embedding(question_vocabulary_size, 32)(question_input)
encoded_question = layers.LSTM(16)(embedded_question)

concatenated = layers.concatenate([encoded_text, encoded_question], axis=-1) # 将编码后的问题和样本连接起来

answer = layers.Dense(answer_vocabulary_size, activation='softmax')(concatenated) # 在上面添加一个softmax分类器

model = Model([text_input, question_input], answer) # 在模型实例化时，指定两个输入和输出
model.compile(optimizer='rmsprop', 
              loss='categorical_crossentropy',
              metrics=['acc'])

In [26]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
text (InputLayer)               (None, None)         0                                            
__________________________________________________________________________________________________
question (InputLayer)           (None, None)         0                                            
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, None, 64)     640000      text[0][0]                       
__________________________________________________________________________________________________
embedding_4 (Embedding)         (None, None, 32)     320000      question[0][0]                   
__________________________________________________________________________________________________
lstm_3 (LS

In [27]:
# 将数据输入到多输入模型中
import numpy as np
import keras

num_samples = 1000
max_length = 100

text = np.random.randint(1, text_vocabulary_size,
                         size=(num_samples, max_length)) # 生成虚构的Numpy数据

question = np.random.randint(1, question_vocabulary_size,
                             size=(num_samples, max_length))

answers = np.random.randint(answer_vocabulary_size, size=(num_samples))
answers = keras.utils.to_categorical(answers, answer_vocabulary_size) # 回答是one-hot编码的，不是整数

# model.fit([text, question], answers, epochs=10, batch_size=128) # 使用输入组成的列表来拟合

model.fit({'text':text, 'question': question}, answers,
          epochs=10, batch_size=128) # 使用输入组成的字典来拟合（只有对输入进行命名后才能使用）

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2219868b5f8>

多输出模型

In [30]:
# 用函数式API实现一个三输出模型
from keras import layers
from keras import Input
from keras.models import Model

vocabulary_size = 50000
num_income_groups = 10

posts_input = Input(shape=(None,), dtype='int32', name='posts')
embedded_posts = layers.Embedding(256, vocabulary_size)(posts_input)
x = layers.Conv1D(128, 5, activation='relu')(embedded_posts)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.MaxPooling1D(5)(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.Conv1D(256, 5, activation='relu')(x)
x = layers.GlobalMaxPooling1D()(x)
x = layers.Dense(128, activation='relu')(x)

age_prediction = layers.Dense(1, name='age')(x) # 注意，输出层都具有名称
income_prediction = layers.Dense(num_income_groups, activation='softmax', name='income')(x)
gender_prediction = layers.Dense(1, activation='sigmoid', name='gender')(x)

model = Model(posts_input, [age_prediction, income_prediction, gender_prediction])

In [31]:
# 多输出模型的编译选项：多重损失
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'])

model.compile(optimizer='rmsprop',
              loss={'age':'mse',
                    'income':'categorical_crossentropy',
                    'gender':'binary_crossentropy'}) # 与上述写法等效（只有输出层具有
                                                     # 名称时才能使用这种写法）

In [33]:
# 多输出模型的编译选项：损失加权
model.compile(optimizer='rmsprop',
              loss=['mse', 'categorical_crossentropy', 'binary_crossentropy'],
              loss_weights=[0.25, 1., 10.])

model.compile(optimizer='rmsprop',
              loss={'age':'mse',
                    'income':'categorical_crossentropy',
                    'gender':'binary_crossentropy'},
              loss_weights={'age':0.25,
                            'income':1.,
                            'gender':10.}) # 与上述写法等效（只有输出层具有名称时
                                           # 才能采用这种写法）

In [36]:
# 将数据输入到多输出模型中
def train_0():
    model.fit(posts, [age_targets, income_targets, gender_targets],
              epochs=10, batch_size=64) # 假设age_targets、income_targets和gender_targets都是Numpy数组

    model.fit(posts, {'age':age_targets,
                      'income':income_targets,
                      'gender':gender_targets},
              epochs=10, batch_size=64) # 与上述写法等效（只有输出层具有名称时才能采用这种写法）

层组成的有向无环图

1. Inception模块

In [44]:
from keras import layers
def Inception():
    # 每个分支都有相同的步幅值（2），这对于保持所有分支输出具有相同的尺寸是很有必要的，
    # 这样你才能将它们连接在一起
    branch_a = layers.Conv2D(128, 1, activation='relu', strides=2)(x) 

    # 在这个分支中，空间卷积层用到了步幅
    branch_b = layers.Conv2D(128, 1, activation='relu')(x)
    branch_b = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_b)

    # 在这个分支中，平均池化层用到了步幅
    branch_c = layers.AveragePooling2D(3, strides-2)(x)
    branch_c = layers.Conv2D(128, 3, activation='relu')(branch_c)

    branch_d = layers.Conv2D(128, 1, activation='relu')(x)
    branch_d = layers.Conv2D(128, 3, activation='relu')(branch_d)
    branch_d = layers.Conv2D(128, 3, activation='relu', strides=2)(branch_d)

    # 将分支输出连接在一起
    output = layers.concatenate([branch_a, branch_b, branch_c, branch_d], axis=-1)

2 残差连接

In [45]:
from keras import layers
def residual_connection():
    x = ...
    y = layers.Conv2D(128, 3, activation='relu', padding='same')(x) # 对x进行变化
    y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
    y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
    
    y = layers.add([y, x]) # 将原始x与输出特征相加
    
    
    # 如果特征图尺寸不用，如下用线性残差连接
    x = ...
    y = layers.Conv2D(128, 3, activation='relu', padding='same')(x)
    y = layers.Conv2D(128, 3, activation='relu', padding='same')(y)
    y = layers.MaxPooling2D(2, strides=2)(y)
    
    # 使用1×1卷积，将原始x张量线性下采样为与y具有相同的形状
    residual = layers.Conv2D(128, 1, strides=2, padding='same')(x) 
    
    y = layers.add([y, residual]) # 将残差张量与输出特征相加

共享层权重

In [46]:
from keras import layers
from keras import Input
from keras.models import Model

def shared_layer():
    lstm = layers.LSTM(32) # 将一个LSTM层实例化一次

    # 构建模型的左分支：输入是长度128的向量组成的变长序列
    left_input = Input(shape=(None, 128))
    left_output = lstm(left_input)

    # 构建模型的右分支：如果调用已有的层实例，那么就会重复使用它的权重
    right_input = Input(shape=(None, 128))
    right_output = lstm(right_input)

    # 在上面构建一个分类器
    merged = layers.concatenate([left_out, right_output], axis=-1)
    predictions = layers.Dense(1, activation='sigmoid')(merged)

    # 将模型实例化并训练：训练这种模型时，基于两个输入对LSTM层的权重进行更新
    model = Model([left_input, right_input], predictions)
    model.fit([left_data, right_data], targets)

将模型作为层

In [47]:
from keras import layers
from keras import applications
from keras import Input

# 图像处理基础模型是Xception网络（只包括卷积基）
xception_base = applications.Xception(weights=None, include_top=False)

# 输入是250×250的RGB图像
left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))

# 对相同的视觉模型调用两次
left_features = xception_base(left_input)
right_input = xception_base(right_input)

# 合并后的特征包含来自左右两个视觉输入中的信息
merged_features = layers.concatenate([left_features, right_input], axis=-1)

**7.2 使用Keras回调函数和TensorBoard来检查并监控深度学习模型**

训练过程中将回调函数作用于模型

1 ModelCheckpoint与EarlyStopping回调函数 

In [1]:
def ModelCheckpoint_EarlyStopping():
    import keras
    
    # 通过fit的callbacks参数将回调函数传入模型中，这个参数接收一个回调函数的列表。
    # 你可以传入任意个数的回调函数
    callback_list = [
        keras.callbacks.EarlyStopping( # 如果不再改善，就中断训练
            monitor='acc', # 监控模型的验证精度
            patience=1,    # 如果精度在多于一轮的实际（即两轮）内不再改善，中断训练
        ),
        keras.callbacks.ModelCheckpoint( # 在每轮过后保存当前权重
            filepath='my_model.h5', # 目标模型文件的保存路径
            
            # 下面两个参数的含义是，如果val_loss没有改善，那么不需要覆盖模型文件。
            # 这就可以始终保存在训练过程中见到的最佳模型
            monitor='val_loss',
            save_best_only=True,
        )
    ]
    
    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['acc']) # 你监控精度，所以它应该是模型指标的一部分
    
    # 注意，由于回调函数要监控验证损失和验证精度，所以在调用fit时需要传入validation_data(验证数据)
    model.fit(x, y,
              epochs=10,
              batch_size=32,
              callbacks=callbacks_list,
              validation_data=(x_val, y_val))

2 ReduceLROnPlateau回调函数

In [2]:
def ReduceLROnPlateau():
    callbacks_list = [
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss', # 监控模型的验证损失
            factor=0.1, # 触发时将学习率除以10
            patience=10, # 如果验证损失在10轮内都没有改善，那么就触发这个回调函数
        )
    ]
    
    # 注意，因为回调函数要监控验证损失，所以你需要在调用fit时传入validation_data(验证数据)
    model.fit(x, y,
              epochs=10,
              batch_size=32,
              callbacks=callbacks_list,
              validation_data=(x_val, y_val))

3 编写你自己的回调函数

In [3]:
import keras
import numpy as np

class ActivationLogger(keras.callbacks.Callback):
    
    def set_model(self, model):
        self.model = model # 在训练之前由父模型调用，告诉回调函数是哪个模型在调用它
        layer_outputs = [layer.output for layer in model.layers]
        self.activations_model = keras.model.Model(model.input, layer_outputs) # 模型实例，返回每层的激活
        
    def on_epoch_end(self, epoch, logs=None):
        if self.validation_data is None:
            raise RuntimeError('Require validation_data.')
            
        validation_sample = self.validation_data[0][0:1] # 获取验证数据的第一个输入样本
        activations = self.activations_model.predict(validation_sample)
        
        # 将数组保存到硬盘
        f = open('activations_at_epoch_' + str(epoch) + '.npz', 'w')
        np.savez(f, activations)
        f.close()

Using TensorFlow backend.


TensorBoard简介：TensorFlow的可视化框架

In [1]:
# 使用了TensorBoard的文本分类模型
import keras
from keras import layers
from keras.datasets import imdb
from keras.preprocessing import sequence

max_features = 2000 # 作为特征的单词个数
max_len = 500 # 在这么多单词之后截断文本（这些单词都属于前max_features个最常见的单词）

(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
x_train = sequence.pad_sequences(x_train, maxlen=max_len)
x_test = sequence.pad_sequences(x_test, maxlen=max_len)

model = keras.models.Sequential()
model.add(layers.Embedding(max_features, 128,
                           input_length=max_len,
                           name='embed'))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.MaxPooling1D(5))
model.add(layers.Conv1D(32, 7, activation='relu'))
model.add(layers.GlobalMaxPooling1D())
model.add(layers.Dense(1))
model.summary()
model.compile(optimizer='rmsprop', 
              loss='binary_crossentropy',
              metrics=['acc'])

Using TensorFlow backend.


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embed (Embedding)            (None, 500, 128)          256000    
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 494, 32)           28704     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 98, 32)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 92, 32)            7200      
_________________________________________________________________
global_max_pooling1d_1 (Glob (None, 32)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 33        
Total params: 291,937
Trainable params: 291,937
Non-trainable params: 0
_________________________________________________________________


In [2]:
# 为TensorBoard日志文件创建一个目录
import os
os.mkdir('my_log_dir')

In [2]:
# 使用一个TensorBoard回调函数来训练模型
callbacks = [
    keras.callbacks.TensorBoard(
        log_dir='my_log_dir', # 日志文件将被写入这个位置
        histogram_freq=1,     # 每一轮之后记录激活直方图
        #embeddings_freq=1,    # 每一轮之后记录嵌入数据
    )
]
history = model.fit(x_train, y_train,
                    epochs=20,
                    batch_size=128, 
                    validation_split=0.2,
                    callbacks=callbacks)

Train on 20000 samples, validate on 5000 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [3]:
# 用keras.utils.plot_model函数将模型绘制成层组成的图
from keras.utils import plot_model

plot_model(model, to_file='model.png')

In [5]:
# 使用plot_model函数及show_shapes选项将模型拓扑结构可视化
from keras.utils import plot_model

plot_model(model, show_shapes=True, to_file='model_1.png')

**7.3 让模型性能发挥到极致**

高级架构模式

1 标准化 

In [7]:
# BatchNormalization层通常在卷积层或密集连接层之后使用

def BatchNormalization_demo():
    conv_model.add(layers.Conv2D(32, 3, activation='relu')) # 在卷积层之后使用
    conv_model.add(layers.BatchNormalization())

    dense_model.add(layers.Dense(32, activation='relu')) # 在Dense层之后使用
    dense_model.add(layers.BatchNormalization())

2 深度可分离卷积 

In [10]:
from keras.models import Sequential, Model
from keras import layers

height = 64
width = 64
channels = 3
num_classes = 10

model = Sequential()
model.add(layers.SeparableConv2D(32, 3,
                             activation='relu',
                             input_shape=(height, width, channels,)))
model.add(layers.SeparableConv2D(64, 4, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.MaxPooling2D(2))

model.add(layers.SeparableConv2D(64, 3, activation='relu'))
model.add(layers.SeparableConv2D(128, 3, activation='relu'))
model.add(layers.GlobalAveragePooling2D())

model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(num_classes, activation='softmax'))

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [11]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
separable_conv2d_1 (Separabl (None, 62, 62, 32)        155       
_________________________________________________________________
separable_conv2d_2 (Separabl (None, 59, 59, 64)        2624      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 29, 29, 64)        0         
_________________________________________________________________
separable_conv2d_3 (Separabl (None, 27, 27, 64)        4736      
_________________________________________________________________
separable_conv2d_4 (Separabl (None, 25, 25, 128)       8896      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 12, 12, 128)       0         
_________________________________________________________________
separable_conv2d_5 (Separabl (None, 10, 10, 64)        9408      
__________

超参数优化

模型集成

In [12]:
# 简单取平均
def ensemble_average():
    # 使用4个不同的模型来计算初始预测
    preds_a = model_a.predict(x_val)
    preds_b = model_b.predict(x_val)
    preds_c = model_c.predict(x_val)
    preds_d = model_d.predict(x_val)
    
    # 这个新的预测数组应该比任何一个初始预测都更加准确
    final_preds = 0.25 * (preds_a + preds_b + preds_c + preds_d) 

In [13]:
# 加权平均
def ensemble_weight():
    preds_a = model_a.predict(x_val)
    preds_b = model_b.predict(x_val)
    preds_c = model_c.predict(x_val)
    preds_d = model_d.predict(x_val)
    
    # 假设(0.5, 0.25, 0.1, 0.15)这些权重是根据经验学到的
    final_preds = 0.5 * preds_a + 0.25 * preds_b + 0.1 * preds_c + 0.15 * preds_d