# CNN的基础概念与操作实现

In [2]:
import torch
import sys
sys.path.append("..")
from utils import *

In [4]:
# 测试写的二维kernel计算
X=torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
K=torch.tensor([[0,1],[2,3]])
corr2d(X,K)

tensor([[25., 31.],
        [43., 49.]])

In [7]:
# 二维卷积模型定义
class Conv2D(nn.Module):
    def __init__(self,kernel_size):
        super(Conv2D,self).__init__()
        # 创建kernel
        self.weight=nn.Parameter(torch.randn(kernel_size))
        self.bias=nn.Parameter(torch.randn(1))
    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

In [8]:
# 通过数据学习kernel
conv2d=Conv2D(kernel_size=(1,2))

In [9]:
# 构建数据
X=torch.ones(size=(6,8))
X[:,2:6]=0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [12]:
K=torch.tensor([[1,-1]])
Y=corr2d(X,K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [14]:
# 通过X和Y来学习kernel
step=20
lr=0.01
for i in range(step):
    Y_hat=conv2d(X)
    loss=((Y-Y_hat)**2).sum()
    loss.backward()
    
    # 梯度下降
    conv2d.weight.data-=lr * conv2d.weight.grad
    conv2d.bias.data -= lr * conv2d.bias.grad
    
    # 梯度清0
    conv2d.weight.grad.fill_(0)
    conv2d.bias.grad.fill_(0)
    if (i + 1) % 5 == 0:
        print('Step %d, loss %.3f' % (i + 1, loss.item()))

Step 5, loss 0.547
Step 10, loss 0.127
Step 15, loss 0.032
Step 20, loss 0.009


# 多输入通道和多输出通道

## 多输入通道
当存在多个输入的时候，存在多少个输入，我们就可以构建多少个卷积核，来做互运算的输出

In [15]:
# 创建数据
# X.shape=(2,3,3)
X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],
              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
# K.shape=(2,2,2)
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])

torch.Size([2, 2, 2])

In [18]:
def corr2d_multi_in(X, K):
    # 沿着X和K的第0维（通道维）分别计算再相加,res类似一个首先确定的标签的内容
    res=corr2d(X[0,:,:],K[0,:,:])
    for i in range(1,X.shape[0]):
        res+=corr2d(X[i,:,:],K[i,:,:])
    return res
corr2d_multi_in(X,K)

tensor([[ 56.,  72.],
        [104., 120.]])

## 多输出通道
在上面，我们是对每一个通道最后进行了相加处理，其实也可以不进行这样的处理，直接返回通道

In [19]:
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历，每次同输入X做互相关计算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K])


In [20]:
K = torch.stack([K, K + 1, K + 2])
K.shape

torch.Size([3, 2, 2, 2])

In [21]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

# 1x1卷积层

In [23]:
X = torch.rand(3, 3, 3)

# 输入通道数为3、输出通道数为2的1×1卷积核
K = torch.rand(2, 3, 1, 1)

In [24]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.view(c_i, h * w)
    K = K.view(c_o, c_i)
     # 全连接层的矩阵乘法
    Y = torch.mm(K, X)
    return Y.view(c_o, h, w)

In [25]:
Y1 = corr2d_multi_in_out_1x1(X, K)
Y2 = corr2d_multi_in_out(X, K)

(Y1 - Y2).norm().item() < 1e-6

True

# 池化层

In [30]:
# 最大池化和平均池化
def pool2d(X,pool_size,mode="max"):
    ph,pw=pool_size
    Y=torch.zeros(size=(X.shape[0]-ph+1,X.shape[1]-pw+1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=="max":
                Y[i,j]=X[i:i+ph,j:j+pw].max()
            if mode=="avg":
                Y[i,j]=X[i:i+ph,j:j+pw].mean()
    return Y

In [31]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

In [32]:
nn.MaxPool2d?

In [33]:
# 实验官方的池化层接口
pool2d=nn.MaxPool2d(kernel_size=3)

In [36]:
X=torch.arange(16,dtype=torch.float32).view(1,1,4,4)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [37]:
pool2d(X)

tensor([[[[10.]]]])

In [38]:
X = torch.cat((X, X + 1), dim=1)
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]],

         [[ 1.,  2.,  3.,  4.],
          [ 5.,  6.,  7.,  8.],
          [ 9., 10., 11., 12.],
          [13., 14., 15., 16.]]]])

In [39]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])