## 自定义网络层

In [1]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics
from tensorflow import keras

In [2]:
def preprocess(x, y):
    """x is a simple image, not a batch"""
    x = tf.cast(x, dtype=tf.float32) / 255.
    x = tf.reshape(x, [28*28])
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)
    return x, y

In [3]:
batchsz = 128
(x, y), (x_val, y_val) = datasets.mnist.load_data()
print('datasets:', x.shape, y.shape, x.min(), y.min())

datasets: (60000, 28, 28) (60000,) 0 0


In [4]:
db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(60000).batch(batchsz)
ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).batch(batchsz)

sample = next(iter(db))
print(sample[0].shape, sample[1].shape)

(128, 784) (128, 10)


In [5]:
network = Sequential([   
    layers.Dense(256, activation=tf.nn.relu),    # [b, 784] => [b, 256]
    layers.Dense(128, activation=tf.nn.relu),    # [b, 256] => [b, 128]
    layers.Dense(64, activation=tf.nn.relu),     # [b, 128] => [b, 64]
    layers.Dense(32, activation=tf.nn.relu),     # [b, 64] => [b, 32]
    layers.Dense(10)                             # [b, 32] => [b, 10]
])
network.build(input_shape=(None, 28*28))
network.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  200960    
_________________________________________________________________
dense_1 (Dense)              multiple                  32896     
_________________________________________________________________
dense_2 (Dense)              multiple                  8256      
_________________________________________________________________
dense_3 (Dense)              multiple                  2080      
_________________________________________________________________
dense_4 (Dense)              multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________


In [10]:
# 自定义层 MyDense 类，需要继承自父类：tf.keras.layers.Layer
class MyDense(layers.Layer):
    # 定义 __init__() 方法，用于初始化 MyDense 层
    def __init__(self, inp_dim, outp_dim):
        # 调用父类的 __init__()
        super(MyDense, self).__init__()
        # 自定义参数，需要调用 add_variable() 方法，便于以后TensorFlow的Sequential统一管理
        # w参数
        self.kernel = self.add_variable('w',[inp_dim, outp_dim])
        # b参数
        self.bias = self.add_variable('b', [outp_dim])
    # 定义 call() 方法,用于执行层的自定义功能
    def call(self, inputs, training=None):
        out = inputs @ self.kernel + self.bias
        return out
    
# 自定义 MyModel 类，需要继承自父类： tf.keras.Model
class MyModel(keras.Model):
    def __init__(self):
        super(MyModel, self).__init__()
        # 使用自定义的 MyDense 类对象定义 MyModel
        self.fc1 = MyDense(28*28, 256)
        self.fc2 = MyDense(256, 128)
        self.fc3 = MyDense(128, 64)
        self.fc4 = MyDense(64, 32)
        self.fc5 = MyDense(32, 10)
    # 自定义定义cal()方法    
    def call(self, inputs, training=None):
#         x = self.fc1(inputs)
#         x = tf.nn.relu(x)
#         x = self.fc2(x)
#         x = tf.nn.relu(x)
#         x = self.fc3(x)
#         x = tf.nn.relu(x)
#         x = self.fc4(x)
#         x = tf.nn.relu(x)
#         x = self.fc5(x)
        x = tf.nn.relu(self.fc1(inputs))
        x = tf.nn.relu(self.fc2(x))
        x = tf.nn.relu(self.fc3(x))
        x = tf.nn.relu(self.fc4(x))
        x = self.fc5(x)      
        return x
    
# 创建神经网络 network
network = MyModel()
# 因为MyModel继承自tf.keras.Model,MyDense 继承自 layers.Layer，所以可以调用的高阶API：
# compile、fit、evaluate 完成对数据的训练和测试
network.compile(optimizer=optimizers.Adam(lr=0.01),
               loss=tf.losses.CategoricalCrossentropy(from_logits=True),
               metrics=['accuracy'])
network.fit(db, epochs=5, validation_data=ds_val, validation_freq=2)
network.evaluate(ds_val)

smaple = next(iter(ds_val))
x = sample[0]
y = sample[1]
# 调用 network 的 predict() 方法，对神经网络进行预测
pred = network.predict(x)
# convert back to number
y = tf.argmax(y, axis=1)
pred = tf.argmax(pred, axis=1)
print(pred)
print(y)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
tf.Tensor(
[7 6 9 4 4 5 7 3 5 8 3 6 3 5 3 1 2 9 5 9 2 2 4 8 6 1 7 9 2 2 9 3 4 3 0 9 9
 2 4 9 0 5 8 0 8 3 2 1 7 1 2 9 3 2 2 0 1 1 0 3 4 9 0 2 8 3 8 9 4 3 2 9 3 4
 4 8 6 5 4 3 0 8 7 6 0 4 0 3 1 1 2 0 6 2 4 7 7 2 0 2 3 6 5 9 8 0 8 5 3 7 4
 6 9 2 7 9 6 1 6 5 0 7 2 1 9 5 2 3], shape=(128,), dtype=int64)
tf.Tensor(
[7 6 9 4 4 5 7 3 5 8 3 6 3 5 3 1 2 9 5 9 2 2 4 8 6 1 7 9 2 2 9 3 4 3 0 9 9
 2 4 9 0 5 8 0 8 3 2 1 7 8 2 9 3 2 2 0 1 9 0 3 4 9 0 2 8 3 8 9 4 3 2 9 3 4
 4 8 6 5 4 3 0 8 7 6 0 4 0 3 1 1 2 0 6 2 4 7 9 2 0 2 3 6 5 9 8 0 8 5 3 7 4
 6 9 2 7 9 6 1 6 5 0 7 2 1 9 5 2 3], shape=(128,), dtype=int64)
