# 卷积神经网络

## 全连接网络的问题

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers,Sequential,losses,optimizers,datasets

# 获取所有 GPU 设备列表
gpus = tf.config.experimental.list_physical_devices('GPU0')
if gpus:
    try:
        # 设置 GPU 为增长式占用
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # 打印异常
        print(e)

# 创建 4 层全连接网络
model = keras.Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(256, activation='relu'),
    layers.Dense(10),
])
# build 模型，并打印模型信息
model.build(input_shape=(4, 784))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                multiple                  200960    
_________________________________________________________________
dense_1 (Dense)              multiple                  65792     
_________________________________________________________________
dense_2 (Dense)              multiple                  65792     
_________________________________________________________________
dense_3 (Dense)              multiple                  2570      
Total params: 335,114
Trainable params: 335,114
Non-trainable params: 0
_________________________________________________________________


**卷积运算：**  
在信号处理领域，1D连续信号的卷积运算被定义两个函数的积分：函数$f(\tau)$、函数$g(\tau)$，其中$g(\tau)$经过了翻转$g(-\tau)$和平移后变成$g(n-\tau)$。卷积的“卷”是指翻转平移操作，“积”是指积分运算，1D连续卷积定义为：$$(f\otimes g)(n)=\int_{-\infty}^{\infty}f(\tau)g(n-\tau)d\tau$$离散卷积将积分运算换成累加运算：$$(f\otimes g)(n)=\sum_{\tau=-\infty}^{\infty}f(\tau)g(n-\tau)$$

卷积神经层的输出尺寸$[b,h',w',c_{out}]$由卷积核的数量$c_{out}$，卷积核的大小$k$，步长$s$，填充数$p$（只考虑上下填充数量$p_h$相同，左右填充数量$p_w$相同的情况）以及输入$X$的高宽$h/w$共同决定， 它们之间的数学关系可以表达为：$$h'=\left\lfloor \frac{h+2 \cdot p_h - k}{s} \right\rfloor + 1 \\ w'=\left\lfloor\frac{w+2 \cdot p_w - k}{s} \right\rfloor + 1$$其中$p_h$、$p_w$分别表示高、宽方向的填充数量，$\lfloor \cdot \rfloor$表示向下取整。

## 卷积层实现

### 自定义权值

In [2]:
# 模拟输入， 3 通道，高宽为 5
x = tf.random.normal([2,5,5,3]) 
# 需要根据[k,k,cin,cout]格式创建 W 张量， 4 个 3x3 大小卷积核
w = tf.random.normal([3,3,3,4])
# 步长为 1, padding 为 0,
out = tf.nn.conv2d(x,w,strides=1,padding=[[0,0],[0,0],[0,0],[0,0]])
out.shape

TensorShape([2, 3, 3, 4])

上下左右各填充一个单位，则 padding 参数设置为[[0,0],[1,1],[1,1],[0,0]]

In [3]:
x = tf.random.normal([2,5,5,3]) # 模拟输入， 3 通道，高宽为 5
# 需要根据[k,k,cin,cout]格式创建， 4 个 3x3 大小卷积核
w = tf.random.normal([3,3,3,4])
# 步长为 1, padding 为 1,
out = tf.nn.conv2d(x,w,strides=1,padding=[[0,0],[1,1],[1,1],[0,0]])
out.shape

TensorShape([2, 5, 5, 4])

In [4]:
x = tf.random.normal([2,5,5,3]) # 模拟输入， 3 通道，高宽为 5
w = tf.random.normal([3,3,3,4]) # 4 个 3x3 大小的卷积核
# 步长为,padding 设置为输出、输入同大小
# 需要注意的是, padding=same 只有在 strides=1 时才是同大小
out = tf.nn.conv2d(x,w,strides=1,padding='SAME')
out.shape

TensorShape([2, 5, 5, 4])

In [5]:
x = tf.random.normal([2,5,5,3])
w = tf.random.normal([3,3,3,4])
# 高宽先 padding 成可以整除 3 的最小整数 6，然后 6 按 3 倍减少，得到 2x2
out = tf.nn.conv2d(x,w,strides=3,padding='SAME')
out.shape

TensorShape([2, 2, 2, 4])

In [6]:
# 根据[cout]格式创建偏置向量
b = tf.zeros([4])
# 在卷积输出上叠加偏置向量，它会自动 broadcasting 为[b,h',w',cout]
out = out + b
out.shape

TensorShape([2, 2, 2, 4])

### 卷积层类

In [7]:
layer = layers.Conv2D(4,kernel_size=3,strides=1,padding='SAME')
out = layer(x) # 前向计算
out.shape # 输出张量的 shape

TensorShape([2, 5, 5, 4])

In [8]:
# 返回所有待优化张量列表
layer.trainable_variables

[<tf.Variable 'conv2d/kernel:0' shape=(3, 3, 3, 4) dtype=float32, numpy=
 array([[[[-1.65961877e-01, -1.66751057e-01, -1.99422598e-01,
            2.48859316e-01],
          [ 2.05890387e-01,  1.17410600e-01, -2.71057725e-01,
            4.53480780e-02],
          [-8.31105113e-02, -2.25279003e-01,  9.72283483e-02,
           -2.35820100e-01]],
 
         [[-1.61854327e-01,  2.28703648e-01,  1.32889420e-01,
           -3.55903506e-02],
          [-2.19753847e-01, -9.75874811e-02,  1.67854369e-01,
           -1.31139144e-01],
          [-7.00651258e-02,  3.06583852e-01, -4.95633483e-03,
            1.48773402e-01]],
 
         [[-6.97237998e-02,  5.67982793e-02,  2.96313435e-01,
           -2.80013651e-01],
          [ 3.05833727e-01,  1.73607528e-01,  1.84971690e-02,
            2.99893349e-01],
          [-2.29371905e-01,  2.14777499e-01, -1.09476149e-01,
           -2.26079524e-02]]],
 
 
        [[[-1.54422626e-01,  1.15456223e-01, -3.07255179e-01,
           -8.39172155e-02],
     

## 卷积层变种

### 空洞卷积

In [9]:
x = tf.random.normal([1,7,7,1]) # 模拟输入
# 空洞卷积， 1 个 3x3 的卷积核
layer = layers.Conv2D(1,kernel_size=3,strides=1,dilation_rate=2)
out = layer(x) # 前向计算
out.shape

TensorShape([1, 3, 3, 1])

### 转置卷积

In [10]:
# 创建 X 矩阵，高宽为 5x5
x = tf.range(25)+1
# Reshape 为合法维度的张量
x = tf.reshape(x,[1,5,5,1])
x = tf.cast(x, tf.float32)
# 创建固定内容的卷积核矩阵
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
# 调整为合法维度的张量
w = tf.expand_dims(w,axis=2)
w = tf.expand_dims(w,axis=3)
# 进行普通卷积运算
out = tf.nn.conv2d(x,w,strides=2,padding='VALID')
out

<tf.Tensor: id=262, shape=(1, 2, 2, 1), dtype=float32, numpy=
array([[[[ -67.],
         [ -77.]],

        [[-117.],
         [-127.]]]], dtype=float32)>

In [11]:
# 普通卷积的输出作为转置卷积的输入，进行转置卷积运算
xx = tf.nn.conv2d_transpose(out, w, strides=2,
    padding='VALID',
    output_shape=[1,5,5,1])
xx

<tf.Tensor: id=264, shape=(1, 5, 5, 1), dtype=float32, numpy=
array([[[[   67.],
         [ -134.],
         [  278.],
         [ -154.],
         [  231.]],

        [[ -268.],
         [  335.],
         [ -710.],
         [  385.],
         [ -462.]],

        [[  586.],
         [ -770.],
         [ 1620.],
         [ -870.],
         [ 1074.]],

        [[ -468.],
         [  585.],
         [-1210.],
         [  635.],
         [ -762.]],

        [[  819.],
         [ -936.],
         [ 1942.],
         [-1016.],
         [ 1143.]]]], dtype=float32)>

In [12]:
x = tf.random.normal([1,6,6,1])
# 6x6 的输入经过普通卷积
out = tf.nn.conv2d(x,w,strides=2,padding='VALID')
out.shape

TensorShape([1, 2, 2, 1])

In [13]:
# 恢复出 6x6 大小
xx = tf.nn.conv2d_transpose(out, w, strides=2,
    padding='VALID',
    output_shape=[1,6,6,1])
xx

<tf.Tensor: id=273, shape=(1, 6, 6, 1), dtype=float32, numpy=
array([[[[ -12.10025  ],
         [  24.2005   ],
         [ -41.14547  ],
         [   9.689438 ],
         [ -14.534157 ],
         [   0.       ]],

        [[  48.401    ],
         [ -60.50125  ],
         [  91.98038  ],
         [ -24.223595 ],
         [  29.068314 ],
         [   0.       ]],

        [[-108.83326  ],
         [ 145.06502  ],
         [-199.14264  ],
         [   6.6234093],
         [   4.599045 ],
         [   0.       ]],

        [[  96.526024 ],
         [-120.65753  ],
         [  80.52035  ],
         [  80.33585  ],
         [ -96.40303  ],
         [   0.       ]],

        [[-168.92055  ],
         [ 193.05205  ],
         [-104.71335  ],
         [-128.53737  ],
         [ 144.60454  ],
         [   0.       ]],

        [[   0.       ],
         [   0.       ],
         [   0.       ],
         [   0.       ],
         [   0.       ],
         [   0.       ]]]], dtype=float32)>

**转置卷积实现**

In [14]:
# 创建 4x4 大小的输入
x = tf.range(16)+1
x = tf.reshape(x,[1,4,4,1])
x = tf.cast(x, tf.float32)
# 创建 3x3 卷积核
w = tf.constant([[-1,2,-3.],[4,-5,6],[-7,8,-9]])
w = tf.expand_dims(w,axis=2)
w = tf.expand_dims(w,axis=3)
# 普通卷积运算
out = tf.nn.conv2d(x,w,strides=1,padding='VALID')
out

<tf.Tensor: id=288, shape=(1, 2, 2, 1), dtype=float32, numpy=
array([[[[-56.],
         [-61.]],

        [[-76.],
         [-81.]]]], dtype=float32)>

在保持strides=1， padding='VALID'，卷积核不变的情况下，我们通过卷积核w与输出out的转置卷积运算尝试恢复与输入x相同大小的高宽张量

In [15]:
# 恢复 4x4 大小的输入
xx = tf.nn.conv2d_transpose(out, w, strides=1, padding='VALID',
    output_shape=[1,4,4,1])
tf.squeeze(xx)

<tf.Tensor: id=291, shape=(4, 4), dtype=float32, numpy=
array([[  56.,  -51.,   46.,  183.],
       [-148.,  -35.,   35., -123.],
       [  88.,   35.,  -35.,   63.],
       [ 532.,  -41.,   36.,  729.]], dtype=float32)>

In [16]:
# 创建转置卷积类
layer = layers.Conv2DTranspose(1,kernel_size=3,strides=1,padding='VALID')
xx2 = layer(out) # 通过转置卷积层
xx2

<tf.Tensor: id=344, shape=(1, 4, 4, 1), dtype=float32, numpy=
array([[[[  1.896018  ],
         [ 26.08768   ],
         [ 57.787025  ],
         [ 34.44299   ]],

        [[ 24.278782  ],
         [ 89.65541   ],
         [ 82.10664   ],
         [ 14.191902  ]],

        [[  1.3896465 ],
         [ 47.111137  ],
         [ -0.22128582],
         [-53.176964  ]],

        [[-38.092247  ],
         [-34.26138   ],
         [ -7.313441  ],
         [-14.992759  ]]]], dtype=float32)>

###  分离卷积

那么采用分离卷积有什么优势呢？一个很明显的优势在于， 同样的输入和输出，采用Separable Convolution 的参数量约是普通卷积的$\displaystyle \frac{1}{3}$。

## 深度残差网络

### ResBlock实现

In [17]:
class BasicBlock(layers.Layer):
    # 残差模块类
    def __init__(self, filter_num, stride=1):
        super(BasicBlock, self).__init__()
        # f(x)包含了 2 个普通卷积层，创建卷积层 1
        self.conv1 = layers.Conv2D(filter_num, (3, 3), strides=stride, padding='same')
        self.bn1 = layers.BatchNormalization()
        self.relu = layers.Activation('relu')
        # 创建卷积层 2
        self.conv2 = layers.Conv2D(filter_num, (3, 3), strides=1, padding='same')
        self.bn2 = layers.BatchNormalization()
    
        if stride != 1: # 插入 identity 层
            self.downsample = Sequential()
            self.downsample.add(layers.Conv2D(filter_num, (1, 1), strides=stride))
        else: # 否则，直接连接
            self.downsample = lambda x:x
            
    def call(self, inputs, training=None):
        # 前向传播函数
        out = self.conv1(inputs) # 通过第一个卷积层
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out) # 通过第二个卷积层
        out = self.bn2(out)
        # 输入通过 identity()转换
        identity = self.downsample(inputs)
        # f(x)+x 运算
        output = layers.add([out, identity])
        # 再通过激活函数并返回
        output = tf.nn.relu(output)
        return output