In [10]:
import torch
from torch import nn
from d2l import torch as d2l

from tqdm import trange

### 二维互相关运算

In [8]:
def corr2d(X, K):
    h, w = K.shape
    Y = torch.zeros(X.shape[0] - h + 1, X.shape[1] - w + 1).cuda()

    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()

    return Y

In [9]:
X = torch.arange(9.).reshape(3, -1).cuda()
K = torch.arange(4.).reshape(2, -1).cuda()
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]], device='cuda:0')

### 卷基层

In [24]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size)).cuda()
        self.bias = nn.Parameter(torch.zeros(1)).cuda()
    
    def forward(self, X):
        return corr2d(X, self.weight) + self.bias

In [20]:
# 边缘检测
X = torch.ones((6, 8)).cuda()
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]], device='cuda:0')

In [25]:
net = Conv2D((1, 2))
net.weight.data = torch.tensor([[1., -1.]]).cuda()
net(X)

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]], device='cuda:0',
       grad_fn=<AddBackward0>)

### 填充和步幅

In [33]:
# padding 填充
# stride 步幅
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=2, stride=3).cuda()
X = torch.rand(size=(1, 1, 8, 8)).cuda()
X.shape

torch.Size([1, 1, 8, 8])

In [29]:
# padding = 1, stride = 2
Y = conv2d(X)
Y.shape

torch.Size([1, 1, 4, 4])

In [32]:
# padding = 2, stride = 2
Y = conv2d(X)
Y.shape

torch.Size([1, 1, 5, 5])

In [34]:
# padding = 2, stride = 3
Y = conv2d(X)
Y.shape

torch.Size([1, 1, 4, 4])

### 多输入多输出通道

In [35]:
# 多输入单输出
def corr2d_multi_in(X, K):
    return sum(corr2d(x, k) for x, k in zip(X, K))

In [36]:
X = torch.arange(18.).reshape(2, 3, 3)
K = torch.arange(8.).reshape(2, 2, 2)
corr2d_multi_in(X, K)

tensor([[268., 296.],
        [352., 380.]], device='cuda:0')

In [37]:
# 多输入多输出
def corr2d_multi_in_out(X, K):
    return torch.stack([corr2d_multi_in(X, k) for k in K], 0)

In [38]:
K = torch.stack((K, K+1, K+2), 0)
K.shape

torch.Size([3, 2, 2, 2])

In [39]:
corr2d_multi_in_out(X, K)

tensor([[[268., 296.],
         [352., 380.]],

        [[320., 356.],
         [428., 464.]],

        [[372., 416.],
         [504., 548.]]], device='cuda:0')

### 简易实现

In [None]:
# num_in 输入通道数量  num_out 输出通道数量
num_in, num_out = 3, 3
conv2d = nn.Conv2d(num_in, num_out, kernel_size=3, padding=2, stride=3).cuda()