In [2]:
import torch
from torch import nn
from d2l import torch as d2l




### 手动模拟卷积过程

In [3]:
def corr2d(X,K):
    h,w=K.shape
    Y=torch.zeros((X.shape[0]-h+1,X.shape[1]-w+1))
    #用循环模拟卷积过程:对应位置相乘,再求和,此处为padding=1
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
    return Y

In [4]:
X=torch.tensor([i for i in range(25)]).reshape([5,5])
K=torch.ones(9).reshape([3,3])
print(X,'\n',K,'\n',corr2d(X,K))

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]]) 
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) 
 tensor([[ 54.,  63.,  72.],
        [ 99., 108., 117.],
        [144., 153., 162.]])


定义卷积层

In [5]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        super().__init__()
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
    #卷积运算函数
    def corr2d(X,K):
        h,w=K.shape
        Y=torch.zeros((X.shape[0]-h+1,X.shape[1]-h+1))
        #用循环模拟卷积过程:对应位置相乘,再求和,此处为padding=1
        for i in range(Y.shape[0]):
            for j in range(Y.shape[1]):
                Y[i,j]=(X[i:i+h,j:j+w]*K).sum()
        return Y

    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [6]:
X=torch.ones([6,8])
X[:,2:6]=0
print(X)

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])


In [7]:
K = torch.tensor([[1.0, -1.0]])
Y=corr2d(X,K)
print(Y) #仅能检测竖直方向的边缘

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])


In [8]:
print(Conv2D.corr2d(X,K.T)) #检测横向边缘

tensor([[0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0.]])


#### 卷积层的训练

In [9]:
conv2d=nn.Conv2d(1,1,kernel_size=(1,2),bias=False)
#batch,channal,y,x
X=X.reshape((1,1,6,8))
Y=Y.reshape((1,1,6,7))

In [10]:
for i in range(10):
    Y_hat=conv2d(X)#预测值
    loss=(Y_hat-Y)**2/2 #mse
    conv2d.zero_grad()
    loss.sum().backward()#矩阵不能求导数,只有值可以,所以用sum后的值求导
    lr=3e-2
    #优化
    conv2d.weight.data[:]-=lr*conv2d.weight.grad
    if (i+1)%2==0:
                print(f'epoch {i+1}, loss {loss.sum():.3f}')

epoch 2, loss 7.753
epoch 4, loss 3.503
epoch 6, loss 1.584
epoch 8, loss 0.716
epoch 10, loss 0.324


In [11]:
print(conv2d.weight.data) #卷积核的参数值

tensor([[[[ 0.8095, -0.8095]]]])


##### 填充和步幅

In [28]:
def comp_conv2d(conv2d, x):
    x=x.reshape((1, 1) + x.shape)#增加两个维度
    Y=conv2d(x)
    return Y.reshape(x.shape)

conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1,bias=False)
x=torch.rand(size=(8,8))
comp_conv2d(conv2d,X).shape

torch.Size([1, 1, 8, 8])

##### 非正方形的卷积核

In [29]:
conv2d=nn.Conv2d(1,1,kernel_size=(5,3),padding=(2,1),bias=False)
#输出计算公式
X=torch.rand(size=(8,8))
comp_conv2d(conv2d,X).shape

torch.Size([1, 1, 8, 8])

步幅

In [37]:
import torch
from torch import nn

# ##### 填充和步幅
def comp_conv2d(conv2d, X):
    # 1. 增加批量和通道两个维度，将 2D 图像 X 转换为 4D 张量
    X = X.reshape((1, 1) + X.shape)  # 现在 X 的形状是 (1, 1, 8, 8)
    # 2. 通过卷积层
    Y = conv2d(X)  # Y 的形状是 (1, 1, H_out, W_out)
    # 3. 修正：返回时去掉批量和通道维度，只返回特征图的形状
    # Y.shape[2:] 会得到 (H_out, W_out)
    return Y.reshape(Y.shape[2:])
# 创建一个卷积层
# in_channels=1, out_channels=1, kernel_size=3, padding=1, stride=2
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)

# 创建一个 2D 的输入图像
X = torch.rand(size=(8, 8))

# 调用函数并查看输出形状
output_shape = comp_conv2d(conv2d, X).shape


输入图像 X 的形状: torch.Size([8, 8])
卷积层 conv2d 的参数: Conv2d(1, 1, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
输出特征图的形状: torch.Size([4, 4])


#### channal通道
每个通道都有一个卷积核,结果是所有通道卷积结果的和
$\begin{aligned}&\bullet\text{ 输入 }\mathbf{X}:c_i\times n_h\times n_w\\&\bullet\text{ 核 }\mathbf{W}:c_i\times k_h\times k_w\\&\bullet\text{ 输出 }\mathbf{Y}:m_h\times m_w\end{aligned}$

如果有多个三维卷积核，那么每个核生成一个输出通道
$\begin{aligned}&\bullet\text{ 输入 }\mathbf{X}:c_i\times n_h\times n_w\\&\bullet\text{ 核 }\mathbf{W}:c_o\times c_i\times k_h\times k_w\\&\bullet\text{ 输出 }\mathbf{Y}:c_o\times m_h\hat{\times}m_w\end{aligned}$