In [9]:
import torch
import torch.nn as nn


def corr2d(X,K):#@save
    '''二维互相关运算'''
    h,w=K.shape
    #经过卷积之后图片的大小
    Y=torch.zeros(X.shape[0]-h+1,X.shape[1]-w+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i][j]=(X[i:i+h,j:j+w]*K).sum()
    return Y

In [10]:
X=torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
K=torch.tensor([[0,1],[2,3]])
output=corr2d(X,K)
output

tensor([[19., 25.],
        [37., 43.]])

In [None]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size) -> None:
        super(Conv2D).__init__()
        self.weight=nn.Parameter(torch.rand(kernel_size))#torch.nn.Parameter()将一个不可训练的tensor转换成可以训练的类型parameter，并将这个parameter绑定到这个module里面。即在定义网络时这个tensor就是一个可以训练的参数了。使用这个函数的目的也是想让某些变量在学习的过程中不断的修改其值以达到最优化。
        self.bias=nn.Parameter(torch.zeros(1))

    def forward(self,x):
        return corr2d(x,self.weight)+self.bias

In [13]:
X=torch.zeros((6,8))
X[:,0:2]=1
X[:,6:8]=1
X


tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [15]:
K=torch.tensor([[1,-1]])
Y=corr2d(X,K)
Y# 检测到竖直的边缘

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [16]:
corr2d(X.T,K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [19]:
conv2d=nn.Conv2d(1,1,kernel_size=(1,2),bias=False)
X=X.reshape((1,1,6,8))
Y=Y.reshape((1,1,6,7))#第一维度是批量，第二维度是通道数
lr=3e-2

for i in range(30):
    Y_hat=conv2d(X)
    loss=(Y-Y_hat)**2
    conv2d.zero_grad()
    loss.sum().backward()
    conv2d.weight.data[:]-=lr*conv2d.weight.grad
    if (i+1)%2==0:
        print(f"epoch {i+1} ,loss {loss.sum():.3f}")


epoch 2 ,loss 1.283
epoch 4 ,loss 0.235
epoch 6 ,loss 0.047
epoch 8 ,loss 0.011
epoch 10 ,loss 0.003
epoch 12 ,loss 0.001
epoch 14 ,loss 0.000
epoch 16 ,loss 0.000
epoch 18 ,loss 0.000
epoch 20 ,loss 0.000
epoch 22 ,loss 0.000
epoch 24 ,loss 0.000
epoch 26 ,loss 0.000
epoch 28 ,loss 0.000
epoch 30 ,loss 0.000


In [21]:
conv2d.weight.data.reshape(1,2)

tensor([[ 1.0001, -0.9999]])

In [23]:
import torch
from torch import nn
def comp_conv2d(conv2d,X):
    X=X.reshape((1,1)+X.shape)
    Y=conv2d(X)

    return Y.reshape(Y.shape[2:])

conv2d=nn.Conv2d(1,1,kernel_size=3,padding=1)
X=torch.rand(size=(8,8))
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

In [25]:

# 默认情况下，填充为0，步幅为1，通常使用步幅一致，填充一致的；注意，在输入图像的边缘填充元素（通常填的元素是0）
conv2d=nn.Conv2d(1,1,kernel_size=(5,3),padding=(2,1))
X=torch.rand(size=(8,8))
comp_conv2d(conv2d,X).shape

torch.Size([8, 8])

In [26]:
conv2d=nn.Conv2d(1,1,kernel_size=(3,3),padding=1,stride=2)
comp_conv2d(conv2d,X).shape

torch.Size([4, 4])

In [27]:
conv2d=nn.Conv2d(1,1,kernel_size=(3,5),padding=(0,1),stride=(3,4))
comp_conv2d(conv2d,X).shape

torch.Size([2, 2])

In [35]:
import torch
def corr2d_muti_in(X,K):
    ans=[]
    for x,k in zip(X,K):
        ans.append(corr2d(x,k))
    return sum(ans)

X=torch.tensor([[[0,1,2],[3,4,5],[6,7,8]],[[1,2,3],[4,5,6],[7,8,9]]])
X.shape
K=torch.tensor([[[0,1],[2,3]],[[1,2],[3,4]]])
K.shape

torch.Size([2, 2, 2])

In [36]:
corr2d_muti_in(X,K)

tensor([[ 56.,  72.],
        [104., 120.]])

In [37]:
def corr2d_muti_in_out(X,K):
    ans=[]
    for k in K:
        ans.append(corr2d_muti_in(X,k))
    return torch.stack(ans,dim=0)

K=torch.stack((K,K+1,K+2),dim=0)
K.shape

torch.Size([3, 2, 2, 2])

In [39]:
ans=corr2d_muti_in_out(X,K)
ans.shape

torch.Size([3, 2, 2])

In [40]:
#卷积的本质是有效提取相邻像素之间的相关特征：在高度和宽度维度上，实现识别相邻元素之间相互作用的能力
def corr2d_muti_in_out_1x1(X,K):
    ci,h,w=X.shape
    co=K.shape[0]
    X=X.reshape((ci,h*w))
    K=K.reshape((co,ci))
    Y=torch.matmul(K,X)
    return Y.reshape(co,h,w)#注意，1*1卷积核不会改变图像的高和宽，只会改变图像的通道数目
X=torch.normal(0,1,(3,3,3))
print("X= ",X)
K=torch.normal(0,1,(2,3,1,1))
print("K =",K)
Y1=corr2d_muti_in_out_1x1(X,K)

X=  tensor([[[ 2.0955,  0.5396, -1.2539],
         [-0.4471, -0.5789,  2.5363],
         [-1.0253,  0.8725, -0.4782]],

        [[ 0.5406, -1.7492, -2.0460],
         [ 0.5558,  1.8927,  0.2620],
         [ 1.1124, -2.3521, -1.3561]],

        [[-0.2961,  1.2635,  2.7274],
         [-1.7963, -0.3104, -0.0049],
         [ 0.6914,  0.4982,  1.7750]]])
K = tensor([[[[-1.7657]],

         [[ 0.8298]],

         [[-1.3106]]],


        [[[ 0.4125]],

         [[ 0.8901]],

         [[-0.3528]]]])


In [41]:
Y2=corr2d_muti_in_out(X,K)
print(Y1)

tensor([[[-2.8634, -4.0601, -3.0581],
         [ 3.6048,  2.9995, -4.2545],
         [ 1.8273, -4.1452, -2.6072]],

        [[ 1.4501, -1.7800, -3.3006],
         [ 0.9440,  1.5553,  1.2812],
         [ 0.3232, -1.9094, -2.0306]]])


In [42]:
print(Y2)

tensor([[[-2.8634, -4.0601, -3.0581],
         [ 3.6048,  2.9995, -4.2545],
         [ 1.8273, -4.1452, -2.6072]],

        [[ 1.4501, -1.7800, -3.3006],
         [ 0.9440,  1.5553,  1.2812],
         [ 0.3232, -1.9094, -2.0306]]])


In [47]:
assert float(torch.abs(Y1-Y2).sum())<1e-7,"为假时的报错信息写在这里"

AssertionError: False

In [59]:
def pool2d(X,pool_size,mode="max"):
    p_h,p_w=pool_size
    Y=torch.zeros(X.shape[0]-p_h+1,X.shape[1]-p_w+1)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode=="max":
               # print("X[i:i+p_h][j:j+p_w]",X[i:i+p_h])#X[i:i+p_h][j:j+p_w]：这种嵌套切片容易出错，因为第一步切片操作返回的是中间结果，第二步在中间结果上继续切片，容易导致选择的范围与预期不符。
                Y[i,j]=X[i:i+p_h,j:j+p_w].max()
            else:
                Y[i,j]=X[i:i+p_h,j:j+p_w].mean()
    return Y

X=torch.tensor([[0,1,2],[3,4,5],[6,7,8]])
pool2d(X,(2,2),"max")
X

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])

In [60]:
pool2d(X,(2,2),"avg")

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [61]:
X=torch.tensor([[0.0,1.0,2.0],[3.0,4.0,5.0],[6.0,7.0,8.0]])
pool2d(X,(2,2),"avg")#注意，输入不能是长整型

tensor([[2., 3.],
        [5., 6.]])

In [71]:
X=torch.arange(16,dtype=torch.float32).reshape((1,1,4,4))
X

tensor([[[[ 0.,  1.,  2.,  3.],
          [ 4.,  5.,  6.,  7.],
          [ 8.,  9., 10., 11.],
          [12., 13., 14., 15.]]]])

In [63]:
pool2d=nn.MaxPool2d(3)
pool2d(X)

tensor([[[[10.]]]])

In [73]:
pool2d=nn.MaxPool2d(3,padding=1,stride=2)
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])

In [66]:
pool2d=nn.MaxPool2d((2,3),stride=(2,3),padding=(0,1))
pool2d(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [72]:
X=torch.cat((X,X+1),dim=1)#批量，通道，h，w
pool2d=nn.MaxPool2d(3,padding=1,stride=2)
print(X.shape)
print(pool2d(X).shape)

torch.Size([1, 2, 4, 4])
torch.Size([1, 2, 2, 2])


In [1]:
a=[12,1,2,5]
a.append(None)
a

[12, 1, 2, 5, None]