# 卷积神经网络

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, datasets, Sequential

### 1. 自定义权值实现

**在 tensorflow 中：**
- $C_{in} = 输入通道数 = 卷积核通道数$
- $C_{out} = 卷积核数 = 输出通道数$
$$X:[b, h, w, C_{in}],W:[k, k, C_{in}, C_{out}]$$
$$\Downarrow$$
$$O:[b, h', w', C_{out}]$$

In [2]:
x = tf.random.normal([2, 5, 5, 3]) # 输入，5*5，3 通道
w = tf.random.normal([3, 3, 3, 4]) # 4 个 3*3 大小的卷积核

In [3]:
# 设置步长为 1， padding 为 0
# padding 参数的设置格式为: padding=[[0,0],[上,下],[左,右],[0,0]] 
out = tf.nn.conv2d(x, w, strides=1, padding=[[0, 0], [0, 0], [0, 0], [0, 0]])
out.shape

TensorShape([2, 3, 3, 4])

In [4]:
# padding 都为 1
out = tf.nn.conv2d(x, w, strides=1, padding=[[0, 0], [1, 1], [1, 1], [0, 0]])
out.shape

TensorShape([2, 5, 5, 4])

In [5]:
# 步长为,padding 设置为输出、输入同大小 
# 需要注意的是, padding=same 只有在 strides=1 时才是同大小 
out = tf.nn.conv2d(x, w, strides=1, padding='SAME')
out.shape

TensorShape([2, 5, 5, 4])

In [6]:
# 当𝑠 > 1 时，设置 padding='SAME'将使得输出高、宽将成 1/s 倍的减少
# 高宽先 padding 成可以整除 3 的最小整数 6，然后 6 按 3 倍减少，得到 2x2
out = tf.nn.conv2d(x, w, strides=3, padding='SAME')
out.shape

TensorShape([2, 2, 2, 4])

In [7]:
# tf.nn.conv2D 没有实现偏置向量计算， 所以需要手动添加 偏置 bias
b = tf.zeros([4])
out = out + b

### 2. 卷积层类

- 在 `TensorFlow` 中，`API` 的命名有 一定的规律，首字母大写的对象一般表示类，全部小写的一般表示函数

In [8]:
# 卷积核宽高相等时
# 创建 4 个 3 × 3大小的卷积核的卷积层，步长为 1， padding 方案为'SAME'
layer = layers.Conv2D(4, kernel_size=3, strides=1, padding='SAME')
# 卷积核宽高不等时
layer = layers.Conv2D(4, kernel_size=(3, 4), strides=(1, 2), padding="SAME")

In [9]:
layer = layers.Conv2D(4, kernel_size=3, strides=1, padding='SAME')
out = layer(x) # 前向计算
out.shape

TensorShape([2, 5, 5, 4])

In [10]:
# 返回 W 和 b 的列表
# layer.trainable_variables
# layer.kernel # layer.weights
# layer.bias

### 3. LeNet-5 实战

In [11]:
(X_train, y_train), (X_test, y_test) = datasets.mnist.load_data()

In [12]:
X_train = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train = tf.convert_to_tensor(y_train, dtype=tf.int32)

X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test = tf.convert_to_tensor(y_test, dtype=tf.int32)

In [13]:
network = Sequential([
    layers.Conv2D(6, kernel_size=3, strides=1), # 6 个 3x3 的卷积核
    layers.MaxPooling2D(pool_size=2, strides=2), # 宽高各减半的池化层
    layers.ReLU(),
    layers.Conv2D(16, kernel_size=3, strides=1), # 第二个卷积层, 16 个 3x3 卷积核 
    layers.MaxPooling2D(pool_size=2, strides=2), # 宽高各减半的池化层
    layers.ReLU(),
    layers.Flatten(), # 打平层，方便全连接层处理
    
    layers.Dense(120, activation='relu'),
    layers.Dense(84, activation='relu'),
    layers.Dense(10)
])
# build 一次网络模型，给输入 X 的形状，其中 4 为随意给的 batchsz 
network.build(input_shape=(4, 28, 28, 1))
network.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            multiple                  60        
_________________________________________________________________
max_pooling2d (MaxPooling2D) multiple                  0         
_________________________________________________________________
re_lu (ReLU)                 multiple                  0         
_________________________________________________________________
conv2d_4 (Conv2D)            multiple                  880       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 multiple                  0         
_________________________________________________________________
re_lu_1 (ReLU)               multiple                  0         
_________________________________________________________________
flatten (Flatten)            multiple                  0

In [14]:
from tensorflow.keras import losses, optimizers

In [15]:
# 插入通道维度 => [b, 28, 28, 1]
X_train = tf.expand_dims(X_train, axis=3)
X_train.shape

TensorShape([60000, 28, 28, 1])

In [16]:
# 通过设置 from_logits=True 标志位将 softmax 激活函数实现在损失函数中
# 创建损失函数的类，在实际计算时直接调用类实例即可
criteon = losses.CategoricalCrossentropy(from_logits=True)

optimizer = optimizers.SGD(lr=0.01)

for epoch in range(5):
    # 构建梯度记录环境
    with tf.GradientTape() as tape:
        # 前向计算，获得10类别的预测分布，[b, 784] => [b, 10]
        out = network(X_train)
        # 真实标签one-hot编码，[b] => [b, 10]
        y_train_onehot = tf.one_hot(y_train, depth=10)
        # 计算交叉熵损失函数，标量
        loss = criteon(y_train_onehot, out)
    
    print("losses: ", loss)
    # 自动计算梯度
    grads = tape.gradient(loss, network.trainable_variables)
    # 自动更新参数
    optimizer.apply_gradients(zip(grads, network.trainable_variables))

losses:  tf.Tensor(56.36883, shape=(), dtype=float32)
losses:  tf.Tensor(372.36215, shape=(), dtype=float32)
losses:  tf.Tensor(13.492762, shape=(), dtype=float32)
losses:  tf.Tensor(2.3343742, shape=(), dtype=float32)
losses:  tf.Tensor(2.3213124, shape=(), dtype=float32)


**测试**

In [17]:
X_test = tf.expand_dims(X_test, axis=3)
X_test.shape

TensorShape([10000, 28, 28, 1])

In [31]:
y_predict = network(X_test)

In [32]:
y_predict.shape

TensorShape([10000, 10])

In [33]:
# 模型输出没有经过 softmax
y_predict[0]

<tf.Tensor: id=1077, shape=(10,), dtype=float32, numpy=
array([-0.00110437,  0.00995757, -0.01960377, -0.00108513, -0.00507852,
        0.00272152,  0.00353631, -0.00891392,  0.00169442,  0.00873554],
      dtype=float32)>

In [34]:
y_predict = tf.argmax(y_predict, axis=1)

In [38]:
y_predict[:100]

<tf.Tensor: id=1087, shape=(100,), dtype=int64, numpy=
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 9, 1, 1, 3, 1, 1, 1, 1, 1,
       1, 0, 9, 1, 1, 1, 1, 1, 3, 9, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 9, 1, 1, 1], dtype=int64)>

In [41]:
y_predict2 = network(X_test)
y_predict2.shape

TensorShape([10000, 10])