# Keras高层接口

## 常见功能模块

### 常见网络层类

In [1]:
import tensorflow as tf
# 导入 keras 模型，不能使用 import keras，它导入的是标准的 Keras 库
from tensorflow import keras
# 导入常见网络层类 
from tensorflow.keras import layers,Sequential,losses,optimizers,datasets

In [2]:
# 创建输入张量
x = tf.constant([2.,1.,0.1]) 
# 创建 Softmax 层
layer = layers.Softmax(axis=-1) 
# 调用 softmax 前向计算，输出为 out
out = layer(x)
out

<tf.Tensor: id=2, shape=(3,), dtype=float32, numpy=array([0.6590012, 0.242433 , 0.0985659], dtype=float32)>

In [3]:
# 调用 softmax 函数完成前向计算
out = tf.nn.softmax(x) 
out

<tf.Tensor: id=3, shape=(3,), dtype=float32, numpy=array([0.6590012, 0.242433 , 0.0985659], dtype=float32)>

### 网络容器

In [4]:
# 导入 Sequential 容器
from tensorflow.keras import layers, Sequential
network = Sequential([ # 封装为一个网络
    layers.Dense(3, activation=None), # 全连接层，此处不使用激活函数
    layers.ReLU(),#激活函数层
    layers.Dense(2, activation=None), # 全连接层，此处不使用激活函数
    layers.ReLU() #激活函数层
])
x = tf.random.normal([4,3])
# 输入从第一层开始， 逐层传播至输出层，并返回输出层的输出
out = network(x) 
out

<tf.Tensor: id=63, shape=(4, 2), dtype=float32, numpy=
array([[0.        , 0.        ],
       [0.0096464 , 0.03690726],
       [0.09050844, 0.1508593 ],
       [0.9066329 , 1.3231909 ]], dtype=float32)>

In [5]:
# 堆叠 2 次
layers_num = 2 
# 先创建空的网络容器
network = Sequential([]) 
for _ in range(layers_num):
    # 添加全连接层
    network.add(layers.Dense(3)) 
    # 添加激活函数层
    network.add(layers.ReLU())
# 创建网络参数
network.build(input_shape=(4, 4)) 
network.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              multiple                  15        
_________________________________________________________________
re_lu_2 (ReLU)               multiple                  0         
_________________________________________________________________
dense_3 (Dense)              multiple                  12        
_________________________________________________________________
re_lu_3 (ReLU)               multiple                  0         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


当我们通过 Sequential 容量封装多个网络层时， 每层的参数列表将会自动并入Sequential 容器的参数列表中，不需要人为合并网络参数列表，这也是 Sequential 容器的便捷之处。 Sequential 对象的 trainable_variables 和 variables 包含了所有层的待优化张量列表和全部张量列表

In [6]:
# 打印网络的待优化参数名与 shape
for p in network.trainable_variables:
    # 参数名和形状
    print(p.name, p.shape) 

dense_2/kernel:0 (4, 3)
dense_2/bias:0 (3,)
dense_3/kernel:0 (3, 3)
dense_3/bias:0 (3,)


## 模型装配、 训练与测试

### 模型装配

In [7]:
from tensorflow import keras
from tensorflow.keras import layers,Sequential,losses,optimizers,datasets

# 创建 5 层的全连接网络
network = Sequential([layers.Flatten(input_shape=(28,28)),
                      layers.Dense(256, activation='relu'),
                      layers.Dense(128, activation='relu'),
                      layers.Dense(64, activation='relu'),
                      layers.Dense(32, activation='relu'),
                      layers.Dense(10, activation='softmax')])
network.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_5 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_6 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_8 (Dense)              (None, 10)                330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
________________________________________________

In [8]:
# 模型装配
# 采用 Adam 优化器，学习率为 0.01;采用交叉熵损失函数，包含 Softmax
# kears sparse_categorical_crossentropy说明：
# from_logits=False，output为经过softmax输出的概率值。
# from_logits=True，output为经过网络直接输出的 logits张量。
network.compile(optimizer=optimizers.Adam(learning_rate=0.01),
    loss=losses.CategoricalCrossentropy(from_logits=False),
    metrics=['accuracy'] # 设置测量指标为准确率
)

### 模型训练

In [9]:
def preprocess(x, y):
    # [b, 28, 28], [b]
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)

    return x, y

(x, y), (x_test, y_test) = datasets.mnist.load_data()

batchsz = 512
train_db = tf.data.Dataset.from_tensor_slices((x, y))
train_db = train_db.shuffle(1000).map(preprocess).batch(batchsz)

test_db = tf.data.Dataset.from_tensor_slices((x_test, y_test))
test_db = test_db.shuffle(1000).map(preprocess).batch(batchsz)

In [10]:
# 指定训练集为 train_db，验证集为 val_db,训练 5 个 epochs，每 2 个 epoch 验证一次
# 返回训练轨迹信息保存在 history 对象中
history = network.fit(train_db, epochs=5, validation_data=test_db, validation_freq=2)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [11]:
# 打印训练记录
history.history

{'accuracy': [0.8857167, 0.96283334, 0.9730667, 0.9780833, 0.98025],
 'loss': [0.3642711291193962,
  0.12393189992457629,
  0.09001679726640384,
  0.07489063748518626,
  0.06552658499876658],
 'val_accuracy': [0.9586, 0.9589],
 'val_loss': [0.13528729770332576, 0.13842141330242158]}

### 模型测试

In [12]:
# 加载一个 batch 的测试数据
x,y = next(iter(test_db))
print('predict x:', x.shape) # 打印当前 batch 的形状
out = network.predict(x) # 模型预测，预测结果保存在 out 中
print(out)

predict x: (512, 28, 28)
[[9.9968231e-01 2.7021329e-07 1.0419684e-04 ... 4.9376224e-05
  2.4516762e-06 2.5791560e-05]
 [1.3073126e-03 6.9929543e-03 5.3222227e-01 ... 4.2156115e-01
  5.5427975e-03 2.8291810e-03]
 [5.7274467e-12 2.3022945e-08 1.7924759e-07 ... 2.1845930e-08
  5.2079724e-08 3.8408632e-07]
 ...
 [4.5035553e-10 6.9096834e-10 5.1212162e-10 ... 4.3533861e-11
  3.0087224e-06 2.7957947e-05]
 [8.6385694e-07 3.0842818e-06 4.6176160e-06 ... 1.8632505e-05
  5.4410077e-07 3.9405664e-04]
 [2.0788873e-10 9.9992716e-01 7.8617535e-07 ... 4.7528019e-06
  3.0525615e-07 3.6315996e-06]]


In [13]:
# 模型测试，测试在 db_test 上的性能表现
network.evaluate(test_db) 



[0.09765999196097255, 0.9723]

##  模型保存与加载

### 张量方式

In [14]:
bakup_network = network

# 保存模型参数到文件上
network.save_weights('weights.ckpt')
print('saved weights.')
del network # 删除网络对象

saved weights.


In [15]:
# 重新创建相同的网络结构
network = Sequential([layers.Dense(256, activation='relu'),
                    layers.Dense(128, activation='relu'),
                    layers.Dense(64, activation='relu'),
                    layers.Dense(32, activation='relu'),
                    layers.Dense(10, activation='softmax')])
network.compile(optimizer=optimizers.Adam(lr=0.01),
    loss=tf.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)
# 从参数文件中读取数据并写入当前网络
network.load_weights('weights.ckpt')
print('loaded weights!')

loaded weights!


### 网络方式

In [16]:
# 保存模型结构与模型参数到文件
bakup_network.save('model.h5')
print('saved total model.')
del network # 删除网络对象

saved total model.


In [17]:
# 从文件恢复网络结构与网络参数
network = keras.models.load_model('model.h5')



### SavedModel 方式

In [18]:
# 保存模型结构与模型参数到文件
tf.saved_model.save(bakup_network, 'model-savedmodel')
print('saving savedmodel.')
del network # 删除网络对象

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: model-savedmodel\assets
saving savedmodel.


In [19]:
print('load savedmodel from file.')
# 从文件恢复网络结构与网络参数
network = tf.saved_model.load('model-savedmodel')
# 准确率计量器
acc_meter = tf.metrics.CategoricalAccuracy()
for x,y in test_db: # 遍历测试集
    pred = network(x) # 前向计算
    acc_meter.update_state(y_true=y, y_pred=pred) # 更新准确率统计
# 打印准确率
print("Test Accuracy:%f" % acc_meter.result())

load savedmodel from file.
Test Accuracy:0.972300


## 自定义网络

### 自定义网络层

首先创建类，并继承自 Layer 基类。 创建初始化方法，并调用母类的初始化函数， 由于是全连接层， 因此需要设置两个参数：输入特征的长度 inp_dim 和输出特征的长度outp_dim，并通过 self.add_variable(name, shape)创建 shape 大小，名字为 name 的张量$W$，并设置为需要优化。

In [20]:
class MyDense(layers.Layer):
    # 自定义网络层
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        # 创建权值张量并添加到类管理列表中，设置为需要优化
        self.kernel = self.add_weight('w', [inp_dim, outp_dim], trainable=True)

In [21]:
# 创建输入为 4，输出为 3 节点的自定义层
net = MyDense(4,3) 
# 查看自定义层的参数列表
net.variables,net.trainable_variables 

([<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[-0.49712783,  0.41782534,  0.34388518],
         [-0.28433156, -0.3590107 , -0.27746367],
         [ 0.27090347, -0.32440424, -0.2171961 ],
         [ 0.4266827 ,  0.3612796 ,  0.8776803 ]], dtype=float32)>],
 [<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[-0.49712783,  0.41782534,  0.34388518],
         [-0.28433156, -0.3590107 , -0.27746367],
         [ 0.27090347, -0.32440424, -0.2171961 ],
         [ 0.4266827 ,  0.3612796 ,  0.8776803 ]], dtype=float32)>])

In [22]:
class MyDense(layers.Layer):
    # 自定义网络层
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        # 创建权值张量并添加到类管理列表中，设置为需要优化
        self.kernel = tf.Variable(tf.random.normal([inp_dim, outp_dim]), trainable=False)

In [23]:
# 创建输入为 4，输出为 3 节点的自定义层
net = MyDense(4,3) 
# 查看自定义层的参数列表
net.variables,net.trainable_variables 

([<tf.Variable 'Variable:0' shape=(4, 3) dtype=float32, numpy=
  array([[ 1.155194  , -2.6825984 ,  0.5081635 ],
         [ 1.0916729 ,  1.8726718 ,  0.53675693],
         [-1.9162103 ,  0.08422428,  0.47484112],
         [ 2.5061731 , -0.28466296, -0.98096603]], dtype=float32)>], [])

完成自定义类的初始化工作后，我们来设计自定义类的前向运算逻辑，对于这个例子，只需要完成$O=X@W$矩阵运算，并通过固定的 ReLU 激活函数即可

In [24]:
class MyDense(layers.Layer):
    # 自定义网络层
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        # 创建权值张量并添加到类管理列表中，设置为需要优化
        self.kernel = self.add_weight('w', [inp_dim, outp_dim], trainable=True)

    def call(self, inputs, training=None):
        # 实现自定义类的前向计算逻辑
        # X@W
        out = inputs @ self.kernel
        # 执行激活函数运算
        out = tf.nn.relu(out)
        return out

### 自定义网络

In [25]:
network = Sequential([MyDense(784, 256), # 使用自定义的层
            MyDense(256, 128),
            MyDense(128, 64),
            MyDense(64, 32),
            MyDense(32, 10)])
network.build(input_shape=(None, 28*28))
network.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
my_dense_2 (MyDense)         multiple                  200704    
_________________________________________________________________
my_dense_3 (MyDense)         multiple                  32768     
_________________________________________________________________
my_dense_4 (MyDense)         multiple                  8192      
_________________________________________________________________
my_dense_5 (MyDense)         multiple                  2048      
_________________________________________________________________
my_dense_6 (MyDense)         multiple                  320       
Total params: 244,032
Trainable params: 244,032
Non-trainable params: 0
_________________________________________________________________


创建自定义网络类，首先创建类， 并继承自 Model 基类，分别创建对应的网络层对象

In [26]:
class MyModel(keras.Model):
    # 自定义网络类，继承自 Model 基类
    def __init__(self):
        super(MyModel, self).__init__()
        # 完成网络内需要的网络层的创建工作
        self.fc1 = MyDense(28*28, 256)
        self.fc2 = MyDense(256, 128)
        self.fc3 = MyDense(128, 64)
        self.fc4 = MyDense(64, 32)
        self.fc5 = MyDense(32, 10)
        
    def call(self, inputs, training=None):
        # 自定义前向运算逻辑
        x = self.fc1(inputs)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        return x

## 模型乐园

In [27]:
# 加载 ImageNet 预训练网络模型，并去掉最后一层
resnet = keras.applications.ResNet50(weights='imagenet',include_top=False)
resnet.summary()
# 测试网络的输出
x = tf.random.normal([4,224,224,3])
# 获得子网络的输出
out = resnet(x) 
out.shape

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, None, None,  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, None, None, 3 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, None, None, 6 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, None, None, 6 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

TensorShape([4, 7, 7, 2048])

从服务器下载模型结构和在ImageNet数据集上预训练好的网络参数。通过设置include_top参数为False，可以选择去掉ResNet50最后一层，此时网络的输出特征图大小为[b,7,7,2048]。  
新建一个池化层(这里的池化层暂时可以理解为高、宽维度下采样的功能)，将特征从[b,7,7,2048]降维到[b, 2048]。

In [28]:
# 新建池化层
global_average_layer = layers.GlobalAveragePooling2D()
# 利用上一层的输出作为本层的输入，测试其输出
x = tf.random.normal([4,7,7,2048])
# 池化层降维，形状由[4,7,7,2048]变为[4,1,1,2048],删减维度后变为[4,2048]
out = global_average_layer(x)
print(out.shape)

(4, 2048)


In [29]:
# 新建全连接层
fc = layers.Dense(100)
# 利用上一层的输出[4,2048]作为本层的输入，测试其输出
x = tf.random.normal([4,2048])
# 输出层的输出为样本属于 100 类别的概率分布
out = fc(x) 
print(out.shape)

(4, 100)


在创建预训练的 ResNet50 特征子网络、 新建的池化层和全连接层后，我们重新利用Sequential 容器封装成一个新的网络。

In [30]:
# 重新包裹成我们的网络模型
mynet = Sequential([resnet, global_average_layer, fc])
mynet.summary()

Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, None, None, 2048)  23587712  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense_14 (Dense)             (None, 100)               204900    
Total params: 23,792,612
Trainable params: 23,739,492
Non-trainable params: 53,120
_________________________________________________________________
