In [10]:
import torch
from torch import nn
import traintools


In [11]:
X = torch.tensor(
    [
        [0.0, 1.0, 2.0],
        [3.0, 4.0, 5.0],
        [6.0, 7.0, 8.0]
    ]
)

K = torch.tensor(
    [
        [0.0, 1.0],
        [2.0, 3.0],
    ]
)

traintools.corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

# 边缘检测

In [12]:
X = torch.ones(6,8)
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [13]:
# 这个卷积核通过计算相邻元素的差值来检测边缘
# 如果两个元素相等，则输出0；如果不等，则输出非零值
# 仅能检测垂直边缘
K = torch.tensor(
    [
        [1.0, -1.0]
    ]
)

Y = traintools.corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [21]:
conv2d  = nn.Conv2d(
    in_channels=1,
    out_channels=1,
    kernel_size=(1, 2),
    bias=False
)
X = X.reshape((1, 1, 6, 8))
Y = Y.reshape((1, 1, 6, 7))
X,Y

(tensor([[[[1., 1., 0., 0., 0., 0., 1., 1.],
           [1., 1., 0., 0., 0., 0., 1., 1.],
           [1., 1., 0., 0., 0., 0., 1., 1.],
           [1., 1., 0., 0., 0., 0., 1., 1.],
           [1., 1., 0., 0., 0., 0., 1., 1.],
           [1., 1., 0., 0., 0., 0., 1., 1.]]]]),
 tensor([[[[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
           [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
           [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
           [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
           [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
           [ 0.,  1.,  0.,  0.,  0., -1.,  0.]]]]))

In [19]:

lr  = 3e-2

for i in range(10):
    Y_hat  = conv2d(X)
    loss   = (Y_hat - Y).pow(2)
    conv2d.zero_grad()
    loss.sum().backward()

    conv2d.weight.data -= lr * conv2d.weight.grad

    if(i+1) % 2 == 0:
        print(f'epoch {i+1}, loss {loss.sum():.3f}')
        print(f'weight: {conv2d.weight.data.reshape(-1)}')
        print(f'grad: {conv2d.weight.grad.reshape(-1)}')



epoch 2, loss 12.984
weight: tensor([ 0.2900, -0.3908])
grad: tensor([-8.5896, 16.1451])
epoch 4, loss 2.236
weight: tensor([ 0.6976, -0.7621])
grad: tensor([-2.6479,  7.4834])
epoch 6, loss 0.399
weight: tensor([ 0.8687, -0.9100])
grad: tensor([-0.5275,  3.6223])
epoch 8, loss 0.077
weight: tensor([ 0.9415, -0.9679])
grad: tensor([0.1404, 1.8402])
epoch 10, loss 0.017
weight: tensor([ 0.9730, -0.9899])
grad: tensor([0.2857, 0.9819])


In [20]:
conv2d.weight.data.reshape((1,2))

tensor([[ 0.9730, -0.9899]])

卷积层，又称特征映射，可以视为一个输入映射到下一层的空间维度的转换器

感受野是指前向传播期间可能影响x计算的所有元素

卷积核的大小通常为奇数，可以方便填充时左右对称上下对称


In [29]:
import numpy as np


def corr2d_multi_in(X, K):
    # X: (in_channels, height, width)
    # K: (in_channels, kernel_height, kernel_width)
    return sum(traintools.corr2d(x, k) for x, k in zip(X, K))

X = torch.tensor([[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]],
               [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]])
K = torch.tensor([[[0.0, 1.0], [2.0, 3.0]], [[1.0, 2.0], [3.0, 4.0]]])

corr2d_multi_in(X, K)

tensor([[ 56.,  72.],
        [104., 120.]])

In [30]:
def corr2d_multi_in_out(X, K):
    # X: (in_channels, height, width)
    # K: (out_channels, in_channels, kernel_height, kernel_width)
    return torch.stack(
        [corr2d_multi_in(X, k) for k in K], dim=0
    )

In [31]:
K = torch.stack((K,K+1,K+2), dim=0)
K.shape

torch.Size([3, 2, 2, 2])

In [32]:
corr2d_multi_in_out(X, K)

tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

1*1卷积的唯一计算发生在通道上
通常用于调整通道数和控制模型复杂性