In [1]:
import torch
from torch import nn
import sys
sys.path.append('..')
import library.d2lzh_pytorch as d2l

# 多输出通道
设卷积核输入通道数和输出通道数分别为ci和co,高和宽分别为kh和kw。如果希望得到含多个通道的输出,我们可以为每个输出通道分别创建形状为ci * kh * kw的核数组。将它们在输出通道维上连结,卷积核的形状即ci * co * kh * kw。

In [2]:
def corr2d_multi_in(X, K):
    res = d2l.corr2d(X[0, :, :], K[0, :, :])
    for i in range(1, K.shape[0]):
        res += d2l.corr2d(X[i, :, :], K[i, :, :])
    return res

In [3]:
def corr2d_multi_in_out(X, K):
    # 对K的第0维遍历,每次同输入X做互相关计算。所有结果使用stack函数合并在一起
    return torch.stack([corr2d_multi_in(X, k) for k in K])

In [4]:
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])
K = torch.stack([K, K + 1, K + 2])
K.shape

torch.Size([3, 2, 2, 2])

In [5]:
X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],  [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])

In [6]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

# 1 * 1 卷积层

In [7]:
def corr2d_multi_in_out_1x1(X, K):
    c_i, h, w = X.shape
    c_o = K.shape[0]
    X = X.view(c_i, h * w)
    K = K.view(c_o, c_i)
    Y = torch.mm(K, X)
    return Y.view(c_o, h, w)

In [8]:
X = torch.rand(3, 3, 3)
K = torch.rand(2, 3, 1, 1)

In [9]:
Y1 = corr2d_multi_in_out_1x1(X, K)  
Y2 = corr2d_multi_in_out(X, K)
(Y1 - Y2).norm().item() < 1e-6

True

# 池化层

In [14]:
def pool2d(X, pool_size, mode='max'):
    X = X.float()
    p_h, p_w = pool_size
    Y = torch.zeros(X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
    return Y

In [15]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])  
pool2d(X, (2, 2))

tensor([[4., 5.],
        [7., 8.]])

In [16]:
pool2d(X, (2, 2), mode='avg')

tensor([[2., 3.],
        [5., 6.]])

In [17]:
X = torch.arange(16, dtype=torch.float).view((1, 1, 4, 4))  
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

# 默认情况下, MaxPool2d 实例里步幅和池化窗口形状相同

In [21]:
p2d = nn.MaxPool2d(3)
p2d(X)

tensor([[[[10.]]]])

In [26]:
p2d = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

In [27]:
p2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])