In [1]:
import torch
from d2l import torch as d2l

In [7]:
def corr2d(X, K):
    h, k = K.shape
    Y = torch.zeros(size=(X.shape[0] - h + 1, X.shape[1] - k + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + k] * K).sum()
    return Y

In [5]:
# X, K 从第0轴（通道轴）开始遍历
def corr2d_multi_in(X, K):
    return sum(corr2d(x, k) for x, k in zip(X, K))

In [8]:
X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

In [9]:
X.shape

torch.Size([2, 3, 3])

In [10]:
K.shape

torch.Size([2, 2, 2])

In [11]:
# 单通道值
corr2d(X[0], K[0])

tensor([[19., 25.],
        [37., 43.]])

In [13]:
# 验证是否多通道相加结果
corr2d(X[0], K[0]) + corr2d(X[1], K[1])

tensor([[ 56.,  72.],
        [104., 120.]])

## 多输入多输出通道

In [27]:
# 多输出通道
def corr2d_multi_in_out(X, K):
# 如果用tuple的话，这里会产生generator报错，torch.stack可以用tuple or list
#     return torch.stack((corr2d_multi_in(X, k) for k in K), dim=0)
    return torch.stack([corr2d_multi_in(X, k) for k in K], dim=0)

In [19]:
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])
K = torch.stack((K, K+1, K+2), dim=0)

In [21]:
K.shape

torch.Size([3, 2, 2, 2])

In [30]:
X.shape

torch.Size([2, 3, 3])

In [29]:
corr2d_multi_in_out(X, K).shape

torch.Size([3, 2, 2])

## 1x1 卷机层实现

In [31]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.reshape((c_i, h*w))
    K = K.reshape((c_o, c_i))
#     全连接层中的矩阵乘法
    Y = torch.matmul(K, X)
    return Y.reshape((c_o, h, w))

In [32]:
X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))

In [37]:
corr2d_multi_in_out_1x1(X, K).shape

torch.Size([2, 3, 3])

In [38]:
corr2d_multi_in_out(X, K).shape

torch.Size([2, 3, 3])

In [36]:
# 误差计算
(corr2d_multi_in_out_1x1(X, K) - corr2d_multi_in_out(X, K)).mean()

tensor(0.)

## 池化层实现
池化层作用：
1.降低卷积层的空间敏感性
2.降低对空间采样表示的敏感性，保持一定的平移不变性

In [45]:
def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = torch.zeros(size=(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=='max':
                Y[i,j] = X[i:i+p_h,j:j+p_w].max()
            elif mode=='avg':
                Y[i,j] = X[i:i+p_h,j:j+p_w].mean()
    return Y

In [50]:
X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])

In [51]:
pool2d(X, (2, 2), 'max')

tensor([[4., 5.],
        [7., 8.]])

In [52]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

## 池化层的步幅与填充

In [65]:
X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [54]:
import torch.nn as nn

In [56]:
pool2d = nn.MaxPool2d(3)

In [57]:
pool2d(X)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


tensor([[[[10.]]]])

In [66]:
pool2d = nn.MaxPool2d((2, 3), padding=(1, 1), stride=(2, 3))
pool2d(X)

tensor([[[[ 1.,  3.],
          [ 9., 11.],
          [13., 15.]]]])

In [68]:
X.shape

torch.Size([1, 1, 4, 4])

In [69]:
X = torch.cat((X, X + 1), 1)
X.shape

torch.Size([1, 2, 4, 4])

In [70]:
pool2d = nn.MaxPool2d(3, padding=1, stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])

In [71]:
pool2d(X).shape

torch.Size([1, 2, 2, 2])