In [1]:
import torch
from torch import nn

In [3]:
def corr2d(X, K):
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()
    return Y

In [4]:
X = torch.tensor([[0, 1, 2], [3, 4, 5], [6, 7, 8]])
K = torch.tensor([[0, 1], [2, 3]])
corr2d(X, K)

tensor([[19., 25.],
        [37., 43.]])

In [5]:
class Conv2D(nn.Module):
    def __init__(self, kernel_size):
        # 别忘了这一步
        super().__init__()
        # 别忘了用nn.Parameter包裹着
        self.weight = nn.Parameter(torch.rand(kernel_size))
        self.bias = nn.Parameter(torch.zeros(1))
    def forward(self, x):
        return corr2d(x, self.weight) + self.bias

In [6]:
X = torch.ones((6, 8))
X[:, 2:6] = 0
X

tensor([[1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.],
        [1., 1., 0., 0., 0., 0., 1., 1.]])

In [13]:
K = torch.tensor([[1, -1]])
Y = corr2d(X, K)
Y

tensor([[ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.],
        [ 0.,  1.,  0.,  0.,  0., -1.,  0.]])

In [12]:
corr2d(X.T, K)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [38]:
# 第一个1表示通道数，也就是当前层的深度为1，后面输入的图像的深度也必须跟此保持一致，为1
# 第二个1表示输出的深度，也就是所需卷积核的个数
# 第三个参数即卷积核的大小
conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
# 第一个1表示一个batch中样本的个数
# 第二个1表示通道数，也就是当前层的深度 1
# 第三个参数是图像的高度
# 第四个参数是图像的宽度
X = torch.ones((6, 8))
X[:, 2:6] = 0
X = X.reshape((1, 1, 6, 8))
# Y是输出，6=6-1+1，7=8-2+1
Y = Y.reshape((1, 1, 6, 7))
lr = 3e-2
for i in range(10):
    Y_hat = conv2d(X)
    l = (Y - Y_hat) ** 2
    conv2d.zero_grad()
    l.sum().backward()
    conv2d.weight.data[:] -= lr * conv2d.weight.grad
    if (i + 1) % 2 == 0:
        print(f"epoch{i + 1}，loss为{l.sum():.3f}")

epoch2，loss为10.718
epoch4，loss为3.592
epoch6，loss为1.337
epoch8，loss为0.525
epoch10，loss为0.211


In [39]:
# 细心的读者一定会发现，我们学习到的卷积核权重非常接近我们之前定义的卷积核K
conv2d.weight.data.reshape(1, 2)

tensor([[ 0.9434, -1.0377]])

In [3]:
import torch
from torch import nn
def comp_conv2d(conv2d, x):
    # 因为我们之后输入的CNN是框架自带的，所以得在x前面拼接两个维度（batch_size为1和通道数为1）
    x = x.reshape((1, 1) + x.shape)
    y = conv2d(x)
    return y
# 注意，这里的padding是一侧的，而公式里面的ph是上下两侧padding加起来的大小
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1)
x = torch.rand(size=(8, 8))
comp_conv2d(conv2d, x).shape

torch.Size([1, 1, 8, 8])

In [4]:
conv2d = nn.Conv2d(1, 1, kernel_size=(5, 3), padding=(2, 1))
comp_conv2d(conv2d, x).shape

torch.Size([1, 1, 8, 8])

In [5]:
conv2d = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=2)
comp_conv2d(conv2d, x).shape

torch.Size([1, 1, 4, 4])

In [6]:
conv2d = nn.Conv2d(1, 1, kernel_size=(3, 5), padding=(0, 1), stride=(3, 4))
comp_conv2d(conv2d, x).shape

torch.Size([1, 1, 2, 2])

In [1]:
import torch
from d2l import torch as d2l

### 多输入通道

In [9]:
def corr2d_multi_in(X, K):
    for x, k in zip(X, K):
        print(x, k)
    return sum(d2l.corr2d(x, k) for x, k in zip(X, K))
# 输入X为3X3X2，K为2X2X2，输入通道数为2， 输出通道数为1
X = torch.tensor([[[0, 1, 2], [3, 4, 5], [6, 7, 8]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])
K = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])
corr2d_multi_in(X, K)

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]) tensor([[0, 1],
        [2, 3]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]) tensor([[1, 2],
        [3, 4]])


tensor([[ 56.,  72.],
        [104., 120.]])

### 多输出通道

In [13]:
# 首先看一下pytorch当中函数torch.stack的用法
# 将矩阵Z当中的每个元素加1、加2后得到一个新的矩阵后叠加到一起（在第0个维度上）
Z = torch.tensor([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])
Z = torch.stack((Z, Z + 1, Z + 2), 0)
print(Z, Z.shape)

tensor([[[[0, 1],
          [2, 3]],

         [[1, 2],
          [3, 4]]],


        [[[1, 2],
          [3, 4]],

         [[2, 3],
          [4, 5]]],


        [[[2, 3],
          [4, 5]],

         [[3, 4],
          [5, 6]]]]) torch.Size([3, 2, 2, 2])


In [15]:
# 迭代“K”的第0个维度，每次都对输入“X”执行互相关运算。
def corr2d_multi_in_out(X, K):
    # 像遍历这种多维矩阵，直接遍历都是从第0维度取矩阵
    return torch.stack([corr2d_multi_in(X, k) for k in K], 0)
K = torch.stack((K, K + 1, K + 2), 0)
corr2d_multi_in_out(X, K)

tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]) tensor([[0, 1],
        [2, 3]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]) tensor([[1, 2],
        [3, 4]])
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]) tensor([[1, 2],
        [3, 4]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]) tensor([[2, 3],
        [4, 5]])
tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]]) tensor([[2, 3],
        [4, 5]])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]) tensor([[3, 4],
        [5, 6]])


tensor([[[ 56.,  72.],
         [104., 120.]],

        [[ 76., 100.],
         [148., 172.]],

        [[ 96., 128.],
         [192., 224.]]])

### 用全连接层实现1X1卷积

In [17]:
def corr2d_multi_in_out_1x1(X, K):
    c_in, h, w = X.shape
    c_out = K.shape[0]
    X = X.reshape((c_in, h * w))
    # 因为是1X1的卷积核
    K = K.reshape((c_out, c_in))
    Y = torch.matmul(K, X)
    return Y.reshape((c_out, h, w))
X = torch.normal(0, 1, (3, 3, 3))
K = torch.normal(0, 1, (2, 3, 1, 1))
Y1 = corr2d_multi_in_out(X, K)
Y2 = corr2d_multi_in_out_1x1(X, K)
assert float(torch.abs(Y1 - Y2).sum()) < 1e-6

tensor([[ 1.7902,  0.0789, -1.3491],
        [-1.2572, -0.3948,  0.4476],
        [ 1.6304,  1.7899, -0.6143]]) tensor([[1.1534]])
tensor([[-1.5398,  0.1537, -0.0746],
        [ 0.0291, -0.9116, -1.2306],
        [ 0.1234,  1.2301,  1.4313]]) tensor([[-0.4764]])
tensor([[ 1.4248,  0.7758,  0.7644],
        [-1.2316,  2.6688, -1.7786],
        [ 1.3114,  0.3418, -1.0202]]) tensor([[-0.8763]])
tensor([[ 1.7902,  0.0789, -1.3491],
        [-1.2572, -0.3948,  0.4476],
        [ 1.6304,  1.7899, -0.6143]]) tensor([[-0.6258]])
tensor([[-1.5398,  0.1537, -0.0746],
        [ 0.0291, -0.9116, -1.2306],
        [ 0.1234,  1.2301,  1.4313]]) tensor([[0.1809]])
tensor([[ 1.4248,  0.7758,  0.7644],
        [-1.2316,  2.6688, -1.7786],
        [ 1.3114,  0.3418, -1.0202]]) tensor([[2.7515]])


# 池化层

In [1]:
import torch
from torch import nn
from d2l import torch as d2l

In [6]:
def pool2d(X, pool_size, mode='max'):
    ph, pw = pool_size
    Y = torch.zeros((X.shape[0] - ph + 1, X.shape[1] - pw + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + ph, j: j + pw].max()
            elif mode == 'avg':
                Y[i, j] = X[i: i + ph, j: j + pw].mean()
    return Y
X = torch.tensor([[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]])
pool2d(X, (2, 2), 'max')

tensor([[4., 5.],
        [7., 8.]])

In [7]:
pool2d(X, (2, 2), 'avg')

tensor([[2., 3.],
        [5., 6.]])

## 步幅和padding

In [8]:
X = torch.arange(16, dtype=torch.float32).reshape(1, 1, 4, 4)
# 使用Pytorch深度学习框架时，若未显示声明步幅，则步幅与Pooling窗口大小相同，均为3
pooling = nn.MaxPool2d(3)
pooling(X)

tensor([[[[10.]]]])

In [9]:
# 也可显示指明步幅和padding大小
pooling2 = nn.MaxPool2d(3, padding=1, stride=2)
pooling2(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [11]:
# 当然，我们可以设定一个任意大小的矩形汇聚窗口，并分别设定填充和步幅的高度和宽度。
pooling3 = nn.MaxPool2d((2, 3), padding=(0, 1), stride=(2, 3))
pooling3(X)

tensor([[[[ 5.,  7.],
          [13., 15.]]]])

In [23]:
# 在处理多通道输入数据时，汇聚层在每个输入通道上单独运算，而不是像卷积层一样在通道上对输入进行汇总。 这意味着汇聚层的输出通道数与输入通道数相同。下面，我们将在通道维度上连结张量X和X + 1，以构建具有2个通道的输入。
X = torch.arange(16, dtype=torch.float32).reshape(1, 1, 4, 4)
X = torch.cat((X, X + 1), 1)
X.shape

torch.Size([1, 2, 4, 4])

In [24]:
pooling4 = nn.MaxPool2d(3, padding=1, stride=2)
pooling4(X)

tensor([[[[ 5.,  7.],
          [13., 15.]],

         [[ 6.,  8.],
          [14., 16.]]]])

### 下面介绍一下torch.cat()和torch.stack()的区别(cat 和 stack的区别在于 cat会增加现有维度的值,可以理解为续接，stack会新加增加一个维度，可以理解为叠加)

#### 1.torch.cat()
#### torch.cat()函数可以将多个张量拼接成一个张量。torch.cat()有两个参数，第一个是要拼接的张量的列表或是元组；第二个参数是拼接的维度。

In [25]:
# 假设是时间步T1的输出
T1 = torch.tensor([[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]])
# 假设是时间步T2的输出
T2 = torch.tensor([[10, 20, 30],
          [40, 50, 60],
          [70, 80, 90]])
print("T1.shape: ", T1.shape, "T2.shape: ", T2.shape)
print(torch.cat((T1,T2),dim=0).shape)
print(torch.cat((T1,T2),dim=1).shape)

T1.shape:  torch.Size([3, 3]) T2.shape:  torch.Size([3, 3])
torch.Size([6, 3])
torch.Size([3, 6])


#### 2.torch.stack()
#### torch.stack()函数同样有张量列表和维度两个参数。stack与cat的区别在于，torch.stack()函数要求输入张量的大小完全相同，得到的张量的维度会比输入的张量的大小多1，并且多出的那个维度就是拼接的维度，那个维度的大小就是输入张量的个数。

In [29]:
print("T1.shape: ", T1.shape, "T2.shape: ", T2.shape)
print(torch.stack((T1,T2),dim=0).shape)
print(torch.stack((T1,T2),dim=1).shape)
print(torch.stack((T1,T2),dim=2).shape)

T1.shape:  torch.Size([3, 3]) T2.shape:  torch.Size([3, 3])
torch.Size([2, 3, 3])
torch.Size([3, 2, 3])
torch.Size([3, 3, 2])
