In [2]:
# 常见网络类层
# 在tf.keras.layers命名空间中提供了许多大量的常见网络层的类,
# 例如 全连接层、激活函数层、池化层、卷积层、循环神经网络层等
# 对于这些网络层类只需要创建时指定相关参数，并且调用__call__方法即可完成向前计算
# 在__call__方法中，Keras会自动调用每个层的向前传播逻辑，这些逻辑一般实现在类的call函数中

# 搭建Softmax层 利用tf.nn.softmax和利用layers.Soiftmax(axis)类搭建，axis为指定Softmax的维度

import tensorflow as tf
# 导入keras模型
from tensorflow import keras
from tensorflow.keras import layers # 导入常见网络层类
# 创建Softmax层
x = tf.constant([2.,1.,0.1]) # 创建输入张量
layer = layers.Softmax(axis=-1) # 创建Softmax层
out = layer(x)
out

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.6590012 , 0.24243298, 0.09856589], dtype=float32)>

In [3]:
# 利用tf.nn.softmax()函数完成计算
out = tf.nn.softmax(x) # 调用softmax函数完成向前计算
out 

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([0.6590012 , 0.24243298, 0.09856589], dtype=float32)>

In [None]:
###########################################################################################################

In [9]:
# 网络容器
# 当网络层数变得非常多的时候可以利用网络容器Sequential将多个网络层封装成一个
# 大网络模型，只需要调用网络模型的实例一次即可完成数据从第一层到最末层的顺序传播运算。
# 导入Sequential容器
from tensorflow.keras import layers, Sequential
network = Sequential([ # 封装成一个网络
    layers.Dense(3, activation=None), # 全连接层，此处不使用激活函数
    layers.ReLU(), # 激活函数层
    layers.Dense(2, activation=None), # 全连接层，此处不适用激活函数
    layers.ReLU(), # 激活函数层
])
x = tf.random.normal([4, 3])
out = network(x) # 输入从第一层开始， 逐层传播至输出层，并返回输出层的输出
out

<tf.Tensor: shape=(4, 2), dtype=float32, numpy=
array([[0.      , 0.      ],
       [0.      , 0.      ],
       [0.      , 0.      ],
       [0.      , 0.728432]], dtype=float32)>

In [2]:
# 利用add方法添加新的网络层，实现动态创建网络的功能
from tensorflow.keras import layers, Sequential
layers_num = 2 # 堆叠两次
network = Sequential([]) # 先创建空的网络容器
for _ in range(layers_num):
    network.add(layers.Dense(3)) # 添加全连接层
    network.add(layers.ReLU()) # 添加激活函数
network.build(input_shape=(4, 4)) # 创建网络参数
network.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  15        
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
dense_1 (Dense)              multiple                  12        
_________________________________________________________________
re_lu_1 (ReLU)               multiple                  0         
Total params: 27
Trainable params: 27
Non-trainable params: 0
_________________________________________________________________


In [3]:
# 打印网络优化的待优化参数名与shape
for p in network.trainable_variables:
    print(p.name, p.shape)

dense/kernel:0 (4, 3)
dense/bias:0 (3,)
dense_1/kernel:0 (3, 3)
dense_1/bias:0 (3,)


In [None]:
###########################################################################################################

In [4]:
# 模型装配、模型训练
# 模型装配
# 创建5层的全连接网络用于MNIST手写数字识别网络
from tensorflow.keras import Sequential, layers
import tensorflow as tf
network = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10)
])
network.build(input_shape=(4, 28*28))
network.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              multiple                  200960    
_________________________________________________________________
dense_3 (Dense)              multiple                  32896     
_________________________________________________________________
dense_4 (Dense)              multiple                  8256      
_________________________________________________________________
dense_5 (Dense)              multiple                  2080      
_________________________________________________________________
dense_6 (Dense)              multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________


In [5]:
# 导入优化器，损失函数模块
from tensorflow.keras import optimizers, losses
# 模型装配
# 采用Adam优化器，学习率为0.01；采用交叉熵损失函数，包含Softmax
network.compile(optimizer=optimizers.Adam(lr=0.01),
                loss=losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'] # 设置测量指标为准确率
)

# 模型训练
# 指定训练集为train_db，验证集为val_db，训练5个epochs，每2个epochs验证一次
# 返回训练轨迹信息保存在history对象中
history = network.fit(train_db, epochs=5, validation_data=val_db, validation_freq=2)
history.history

NameError: name 'train_db' is not defined

In [7]:
# 手写数字识别MNIST 
# 样本量batchsz：128
# 训练批次epoch = 5
# (x, y) 图像和编号
# (x_val, y_val) 测试图像和其编号
import tensorflow as tf
# 导入数据集datasets，层layers，网络容器Sequential, 优化器optimizers, 损失函数losses
# 测量指标metrics
from tensorflow.keras import datasets, layers, optimizers, losses, metrics


# 预处理函数
# 图像打平， 序号利用onehot编码
# x:图像 y:图像序号
def preprocess(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255
    x = tf.reshape(x, [28*28]) # 打平
    y = tf.cast(y, dtype=tf.int32) 
    y = tf.one_hot(y, depth=10) # onehot编码 深度为10
    return x,y

# 训练样本集大小为128
batchsz = 128
(x, y),(x_val, y_val) = datasets.mnist.load_data()
print('datasets:', x.shape, y.shape, x.min(), x.max())

# 数据集
train_db = tf.data.Dataset.from_tensor_slices((x, y)) # 切片
train_db = train_db.map(preprocess).shuffle(60000).batch(batchsz) # 随即打散获得128个训练样本
val_db = tf.data.Dataset.from_tensor_slices((x_val, y_val)) # 切片获得测试数据
val_db = val_db.map(preprocess).batch(batchsz)

# 迭代样本集
sample = next(iter(train_db))
print(sample[0].shape, sample[1].shape)

# 定义5层的全连接网络
network = Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(32, activation='relu'),
    layers.Dense(10)
])
network.build(input_shape=(4, 28*28))
network.summary()

# 模型装配
# 采用Adam优化器，学习率为0.01；采用交叉熵损失函数，包含Softmax
network.compile(optimizer=optimizers.Adam(lr=0.01),
                loss=losses.CategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'] # 设置测量指标为准确率
)

# 模型训练 样本集为train_db epochs为5 验证数据为val_db 每2个epoch验证一次
network.fit(train_db, epochs=5, validation_data=val_db, validation_freq=2)
network.evaluate(val_db) # 返回损失值和模型度量值

# 迭代测试数据
sample = next(iter(val_db))
x = sample[0]
y = sample[1] # one_hot
pred = network.predict(x) # [b,10]
y = tf.argmax(y, axis=1)
pred = tf.argmax(pred, axis=1)

print(pred)
print(y)




datasets: (60000, 28, 28) (60000,) 0 255
(128, 784) (128, 10)
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  200960    
_________________________________________________________________
dense_1 (Dense)              multiple                  32896     
_________________________________________________________________
dense_2 (Dense)              multiple                  8256      
_________________________________________________________________
dense_3 (Dense)              multiple                  2080      
_________________________________________________________________
dense_4 (Dense)              multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
tf

In [3]:
# 自定义网络层
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, losses, metrics
class MyDense(layers.Layer):
    # 自定义网络层
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        # 创建权值张量并添加到类管理列表中，设置为需要优化
        self.kernel = self.add_variable('w', [inp_dim, outp_dim], trainable=True)
        
net = MyDense(4, 3) #创建输入节点为4， 输出节点为3的自定义层
net.variables, net.trainable_variables # C哈看自定义层的参数列表

Instructions for updating:
Please use `layer.add_weight` method instead.


([<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[ 0.44240594, -0.07384259,  0.36983418],
         [ 0.02167153,  0.51211226,  0.37494767],
         [-0.78961504,  0.02941352, -0.74860394],
         [-0.04101104, -0.52576876, -0.49299306]], dtype=float32)>],
 [<tf.Variable 'w:0' shape=(4, 3) dtype=float32, numpy=
  array([[ 0.44240594, -0.07384259,  0.36983418],
         [ 0.02167153,  0.51211226,  0.37494767],
         [-0.78961504,  0.02941352, -0.74860394],
         [-0.04101104, -0.52576876, -0.49299306]], dtype=float32)>])

In [3]:
# 自定义网络层
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, losses, metrics, Sequential
class MyDense(layers.Layer):
    # 自定义网络层
    def __init__(self, inp_dim, outp_dim):
        super(MyDense, self).__init__()
        # 创建权值张量并添加到类管理列表中，设置为需要优化
        self.kernel = self.add_variable('w', [inp_dim, outp_dim], trainable=True)
    
    def call(self, inputs, training=None):
        # 定义向前计算逻辑
        # X @ W
        out = inputs @ self.kernel
        # 执行向前计算的激活函数
        out = tf.nn.relu(out)
        return out
    
# net = MyDense(4, 3) #创建输入节点为4， 输出节点为3的自定义层
# net.variables, net.trainable_variables # 查看自定义层的参数列表

# 使用自定义的网络层来实现MNIST手写数字识别
network = Sequential([MyDense(784, 256),
                      MyDense(256, 128),
                      MyDense(128, 64),
                      MyDense(64, 32),
                      MyDense(32, 10)
])
network.build(input_shape=(None, 28*28))
network.summary()

# 自定义一个继承自keras.Model的自定义网络类

class MyModel(tf.keras.Model):
    # 自定义网络类，继承自Model基类
    def __init__(self):
        super(MyModel, self).__init__()
        # 完成网络层内需要的网络层创建工作
        self.fc1 = MyDense(28*28, 256)
        self.fc2 = MyDense(256, 128)
        self.fc3 = MyDense(128, 64)
        self.fc4 = MyDense(64 ,32)
        self.fc5 = MyDense(32, 10)
        
    # 实现向前计算逻辑
    def call(self, inputs, training=None):
        # 自定义向前计算逻辑
        x = self.fc1(inputs)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        return x


network_1 = MyModel()
network_1.build(input_shape=(None, 28*28))
network_1.summary()


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
my_dense_20 (MyDense)        multiple                  200704    
_________________________________________________________________
my_dense_21 (MyDense)        multiple                  32768     
_________________________________________________________________
my_dense_22 (MyDense)        multiple                  8192      
_________________________________________________________________
my_dense_23 (MyDense)        multiple                  2048      
_________________________________________________________________
my_dense_24 (MyDense)        multiple                  320       
Total params: 244,032
Trainable params: 244,032
Non-trainable params: 0
_________________________________________________________________
Model: "my_model_2"
_________________________________________________________________
Layer (type)                

In [5]:
##########################################################################################
# 模型乐园
# 加载ImageNet预训练网络模型，并去掉最后一层
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, losses, metrics, Sequential
resnet = tf.keras.applications.ResNet50(weights='imagenet', include_top=False)
resnet.summary()
# 测试网络的输出
x = tf.random.normal([4,224,224,3])
out = resnet(x) # 获得子网络输出
out.shape

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


Exception: URL fetch failure on https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5: None -- EOF occurred in violation of protocol (_ssl.c:748)

In [None]:
############################################################################################################
# 测量工具
import  tensorflow as tf
from    tensorflow.keras import datasets, layers, optimizers, Sequential, metrics


def preprocess(x, y):

    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)

    return x,y


batchsz = 128
(x, y), (x_val, y_val) = datasets.mnist.load_data()
print('datasets:', x.shape, y.shape, x.min(), x.max())



db = tf.data.Dataset.from_tensor_slices((x,y))
db = db.map(preprocess).shuffle(60000).batch(batchsz).repeat(10)

ds_val = tf.data.Dataset.from_tensor_slices((x_val, y_val))
ds_val = ds_val.map(preprocess).batch(batchsz)




network = Sequential([layers.Dense(256, activation='relu'),
                     layers.Dense(128, activation='relu'),
                     layers.Dense(64, activation='relu'),
                     layers.Dense(32, activation='relu'),
                     layers.Dense(10)])
network.build(input_shape=(None, 28*28))
network.summary()

optimizer = optimizers.Adam(lr=0.01)

acc_meter = metrics.Accuracy()
loss_meter = metrics.Mean()


# 遍历当前数据集
for step, (x,y) in enumerate(db):

    with tf.GradientTape() as tape:
        # [b, 28, 28] => [b, 784]
        x = tf.reshape(x, (-1, 28*28))
        # [b, 784] => [b, 10]
        out = network(x)
        # [b] => [b, 10]
        y_onehot = tf.one_hot(y, depth=10)
        # [b]
        loss = tf.reduce_mean(tf.losses.categorical_crossentropy(y_onehot, out, from_logits=True))

        loss_meter.update_state(loss)



    grads = tape.gradient(loss, network.trainable_variables)
    optimizer.apply_gradients(zip(grads, network.trainable_variables))


    if step % 100 == 0:

        print(step, 'loss:', loss_meter.result().numpy())
        loss_meter.reset_states()


    # evaluate
    if step % 500 == 0:
        total, total_correct = 0., 0
        acc_meter.reset_states()

        for step, (x, y) in enumerate(ds_val):
            # [b, 28, 28] => [b, 784]
            x = tf.reshape(x, (-1, 28*28))
            # [b, 784] => [b, 10]
            out = network(x)


            # [b, 10] => [b]
            pred = tf.argmax(out, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
            # bool type
            correct = tf.equal(pred, y)
            # bool tensor => int tensor => numpy
            total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
            total += x.shape[0]

            acc_meter.update_state(y, pred)


        print(step, 'Evaluate Acc:', total_correct/total, acc_meter.result().numpy())

datasets: (60000, 28, 28) (60000,) 0 255
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  200960    
_________________________________________________________________
dense_1 (Dense)              multiple                  32896     
_________________________________________________________________
dense_2 (Dense)              multiple                  8256      
_________________________________________________________________
dense_3 (Dense)              multiple                  2080      
_________________________________________________________________
dense_4 (Dense)              multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________
0 loss: 2.3312695
78 Evaluate Acc: 0.1179 0.1179
100 loss: 0.5564696
200 