In [4]:
# !usr/bin/python
# -*- encoding: utf-8 -*-
# Project: Convolutional Neural Network
# Author: Tracy Tao
# Date: 2022/04/10
import torch
from torch import nn
from d2l import torch as d2l

In [6]:
def corr2d(X, Kernel): #@save
    '''
    :param X: image pixel
    :param Kernel: 卷积核；滤波器
    function: 2 dimensional cross-correlation
    return: 卷积运算之后的结果
    '''
    height, width = Kernel.shape # 获取图像高和宽
    Y = torch.zeros((X.shape[0] - height + 1, X.shape[1] -  width + 1)) # 套用公式，初始化结果矩阵
    for i in range(Y.shape[0]): # 遍历矩阵每一行
        for j in range(Y.shape[1]): # 遍历矩阵每一列
            Y[i,j] = (X[i: i+height, j:j+width] * Kernel).sum() # 按元素乘法求和
    return Y

In [7]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
K = torch.tensor([[0.0, 1.0], [2.0, 3.0]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [8]:
class Conv2D(nn.Module):
    '''使用Module构造卷积类'''
    def __init__(self, kernel_size):
        '''继承module并进行参数初始化'''
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
    
    def forward(self, X):
        return corr2d(x, self.weight) + self.bias

In [9]:
X = torch.ones((6,8))

In [10]:
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [11]:
kernel = torch.tensor([[1.0, -1.0]])

In [14]:
Y = corr2d(X, kernel)
Y # 检测垂直边缘

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [13]:
corr2d(X.t(), kernel)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [22]:
#学习卷积核：在每次迭代中，我们比较Y与卷积层输出的平方误差，然后计算梯度来更新卷积核。

# 构造一个二维卷积层，它具有1个输出通道和形状为（1，2）的卷积核，并忽略偏置。
conv2d = nn.Conv2d(1,1, kernel_size=(1, 2), bias=False)

In [23]:
conv2d

Conv2d(1, 1, kernel_size=(1, 2), stride=(1, 1), bias=False)

In [24]:
# 四维输入和输出格式（批量大小、通道、高度、宽度），
X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))
lr = 3e-2  # 学习率

In [25]:
for i in range(10):
    Y_hat = conv2d(X) # 进行卷积运算求y帽子
    l = (Y_hat - Y) ** 2 # 计算残差平方和
    conv2d.zero_grad() # 初始化梯度
    l.sum().backward() # 求梯度
    conv2d.weight.data[:] -= lr * conv2d.weight.grad # 反向传播更新卷积核
    print(f'epoch {i+1}, loss {l.sum():.3f}')

epoch 1, loss 3.439
epoch 2, loss 1.409
epoch 3, loss 0.577
epoch 4, loss 0.236
epoch 5, loss 0.097
epoch 6, loss 0.040
epoch 7, loss 0.016
epoch 8, loss 0.007
epoch 9, loss 0.003
epoch 10, loss 0.001


In [26]:
conv2d.weight.data.reshape((1, 2))

tensor([[ 0.9934, -0.9942]])

- kernel = torch.tensor([[1.0, -1.0]]) 接近定义的k

In [27]:
# padding
def compute_conv2d(conv2d, X):
    '''
    :param conv2d: 二维卷积核对象
    :param X: 输入矩阵
    function: 卷积运算并reshape
    return Y
    '''
    X = X.reshape((1,1) + X.shape) # （1，1）表示批量大小和通道数都是1
    Y = conv2d(X)
    return Y.reshape(Y.shape[2:]) # 省略前两个维度：批量大小和通道

In [48]:
conv2d = nn.Conv2d(1,1,kernel_size = 3, padding =1) 
conv2d # 高度为3，宽度为1的卷积核，高度填充1。

Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

In [30]:
X = torch.rand(size=(8,8))
X

tensor([[0.7396, 0.8041, 0.0930, 0.6666, 0.3840, 0.0064, 0.5289, 0.5827],
        [0.9123, 0.0828, 0.5479, 0.8958, 0.7478, 0.2029, 0.0101, 0.6377],
        [0.1328, 0.8036, 0.8145, 0.1376, 0.7164, 0.4953, 0.3401, 0.3980],
        [0.5415, 0.6714, 0.7197, 0.1684, 0.2576, 0.5287, 0.8991, 0.2026],
        [0.8565, 0.0918, 0.8069, 0.4115, 0.8803, 0.1885, 0.3074, 0.9184],
        [0.3165, 0.3509, 0.5189, 0.3935, 0.1815, 0.0569, 0.7506, 0.9154],
        [0.9350, 0.9608, 0.2103, 0.7720, 0.9249, 0.8598, 0.9749, 0.7319],
        [0.9839, 0.4489, 0.8695, 0.9849, 0.6791, 0.3685, 0.8929, 0.7662]])

In [31]:
compute_conv2d(conv2d, X)

tensor([[-0.0943,  0.1016,  0.3790, -0.0425, -0.0115,  0.0035,  0.0859,  0.0667],
        [ 0.4600,  0.4985, -0.1226,  0.3465,  0.2424,  0.1764,  0.3253, -0.0366],
        [ 0.3854, -0.0013,  0.3502,  0.5027,  0.1907,  0.2340,  0.1127,  0.0825],
        [ 0.1323,  0.5019,  0.2893,  0.2906,  0.2233,  0.0676,  0.3268,  0.1912],
        [ 0.2779,  0.5621,  0.0941,  0.1532,  0.1509,  0.6839,  0.3906, -0.2625],
        [ 0.3582, -0.0478,  0.3393,  0.4011,  0.2877,  0.0278,  0.1298,  0.1177],
        [ 0.1119,  0.3170,  0.5071, -0.0131,  0.0157,  0.3968,  0.5809,  0.1533],
        [ 0.4059,  0.4821,  0.1945,  0.5828,  0.5937,  0.5340,  0.3785,  0.2926]],
       grad_fn=<ReshapeAliasBackward0>)

In [33]:
X = X.reshape((1,1) + X.shape)
Y = conv2d(X)

In [41]:
Y.shape[2:]

torch.Size([8, 8])

In [38]:
Y.shape,Y.reshape(Y.shape[2:]),Y

(torch.Size([1, 1, 8, 8]),
 tensor([[-0.0943,  0.1016,  0.3790, -0.0425, -0.0115,  0.0035,  0.0859,  0.0667],
         [ 0.4600,  0.4985, -0.1226,  0.3465,  0.2424,  0.1764,  0.3253, -0.0366],
         [ 0.3854, -0.0013,  0.3502,  0.5027,  0.1907,  0.2340,  0.1127,  0.0825],
         [ 0.1323,  0.5019,  0.2893,  0.2906,  0.2233,  0.0676,  0.3268,  0.1912],
         [ 0.2779,  0.5621,  0.0941,  0.1532,  0.1509,  0.6839,  0.3906, -0.2625],
         [ 0.3582, -0.0478,  0.3393,  0.4011,  0.2877,  0.0278,  0.1298,  0.1177],
         [ 0.1119,  0.3170,  0.5071, -0.0131,  0.0157,  0.3968,  0.5809,  0.1533],
         [ 0.4059,  0.4821,  0.1945,  0.5828,  0.5937,  0.5340,  0.3785,  0.2926]],
        grad_fn=<ReshapeAliasBackward0>),
 tensor([[[[-0.0943,  0.1016,  0.3790, -0.0425, -0.0115,  0.0035,  0.0859,
             0.0667],
           [ 0.4600,  0.4985, -0.1226,  0.3465,  0.2424,  0.1764,  0.3253,
            -0.0366],
           [ 0.3854, -0.0013,  0.3502,  0.5027,  0.1907,  0.2340,  0.112

In [47]:
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1)) 
# 高度为5，宽度为3的卷积核，高度和宽度两边的填充分别为2和1。
X = torch.rand(size=(8,8))
compute_conv2d(conv2d, X).shape

torch.Size([8, 8])

In [49]:
# stride 垂直步幅和水平步幅
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
X = torch.rand(size=(8,8))
compute_conv2d(conv2d, X).shape

torch.Size([4, 4])

1. 彩色图像具有标准的RGB通道来代表红、绿和蓝
    - 每个RGB输入图像具有的形状。我们将这个大小为的轴称为通道（channel）维度
2. 输入X的通道数要和kernel的通道数保持一致，因为分别做卷积将返回的结果矩阵相加

In [55]:
def corr2d_multi_in(X, kernel):
    '''多通道需要对每一个通道的维度进行卷积运算，最后相加'''
    return sum(d2l.corr2d(x, kernel) for x,kernel in zip(X,kernel))

In [56]:
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

- 多输出通道
    - 用和分别表示输入ci和输出co通道的数目，并让和为卷积核的高度kh和宽度kw
        - 卷积核形状 co * ci * kh * kw
        - 输出通道的卷积核  ci * kh * kw
        - 每个输出通道先获取所有输入通道，再以对应该输出通道的卷积核计算出结果。

In [58]:
def corr2d_multi_in_out(X,K):
    '''在每一个维度进行卷积，并将卷积结果叠加'''
    return torch.stack([corr2d_multi_in(X,k) for k in K],0)

- torch.stack（tensors,dim=0,out=None）在维度上连接（concatenate）若干个张量。(这些张量形状相同）。
- https://blog.csdn.net/realcoder/article/details/105846408#:~:text=torch.stack%20%28%29%20%E5%87%BD%E6%95%B0%20%EF%BC%9A%20torch.stack%20%28sequence%2C%20dim%3D0%29%201.%E5%87%BD%E6%95%B0,%E8%BF%94%E5%9B%9E%E7%9A%84%E7%BB%93%E6%9E%9C%E4%BC%9A%E6%96%B0%E5%A2%9E%E4%B8%80%E4%B8%AA%E7%BB%B4%E5%BA%A6%EF%BC%8C%E8%80%8C%20stack%EF%BC%88%EF%BC%89%E5%87%BD%E6%95%B0%20%E6%8C%87%E5%AE%9A%E7%9A%84dim%E5%8F%82%E6%95%B0%EF%BC%8C%E5%B0%B1%E6%98%AF%E6%96%B0%E5%A2%9E%E7%BB%B4%E5%BA%A6%E7%9A%84%20%EF%BC%88%20%E4%B8%8B%E6%A0%87%20%EF%BC%89%20%E4%BD%8D%E7%BD%AE%E3%80%82%202.

In [59]:
K = torch.stack((K, K + 1, K + 2), 0)
K.shape

torch.Size([3, 2, 2, 2])

In [60]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

- 卷积的本质是有效提取相邻像素间的相关特征，**卷积层通常用于调整网络层的通道数量和控制模型复杂性。**

1. pooling 降低卷积层对位置的敏感性，同时降低对空间降采样表示的敏感性
    - max pooling, avg pooling

In [63]:
def pool2d(X, pool_size, mode='max'):
    '''
    :param X: inputs
    :param pool_size: (p_h, p_w)
    :param mode: 'max'|'avg'
    return Y: outputs
    '''
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h +1, X.shape[1] - p_w +1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i,j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i,j] = X[i: i + p_h, j: j + p_w].mean()
    return Y    

In [66]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
pool2d(X, (2, 2),'max')

tensor([[4., 5.],
        [7., 8.]])

In [65]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

In [68]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
pool2d = nn.MaxPool2d(3)
pool2d(X)

tensor([[[[10.]]]])

In [69]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [70]:
pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [74]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X = torch.cat((X, X + 1), 0)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]],


        [[[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [73]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X = torch.cat((X, X + 1), 1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [75]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X = torch.cat((X, X + 1), 2)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.],
          [ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [76]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X = torch.cat((X, X + 1), 3)
X

tensor([[[[ 0.,  1.,  2.,  3.,  1.,  2.,  3.,  4.],
          [ 4.,  5.,  6.,  7.,  5.,  6.,  7.,  8.],
          [ 8.,  9., 10., 11.,  9., 10., 11., 12.],
          [12., 13., 14., 15., 13., 14., 15., 16.]]]])