# 手写并验证向量内积实现PyTorch二维卷积

来自b站up主deep_thoughts 合集【PyTorch源码教程与前沿人工智能算法复现讲解】

P_24_手写并验证向量内积实现PyTorch二维卷积：
    
https://www.bilibili.com/video/BV1Qb4y1i7n5/?spm_id_from=pageDriver&vd_source=18e91d849da09d846f771c89a366ed40

torch.nn.Conv2d 官方文档：https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html#torch.nn.Conv2d

## 演示 torch.flatten 功能

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

a = torch.randn(2,3)
print(a)
a = torch.flatten(a)
print(a)

tensor([[-1.6046,  0.3836, -0.0580],
        [ 0.1897,  0.5488, -0.3683]])
tensor([-1.6046,  0.3836, -0.0580,  0.1897,  0.5488, -0.3683])


## 演示 numel 功能

In [3]:
print(a.numel())

6


## 演示 reshape 功能

In [4]:
print(a.reshape(-1,1))

tensor([[-1.6046],
        [ 0.3836],
        [-0.0580],
        [ 0.1897],
        [ 0.5488],
        [-0.3683]])


## step2 用原始的矩阵运算来实现二维卷积，先不考虑batchsize和channel维度，flatten版本

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

input = torch.randn(5, 5)  # 卷积输入特征图
kernel = torch.randn(3, 3)  # 卷积核
bias = torch.randn(1)  # 卷积偏置，默认输出通道数目等于1

def matrix_multiplication_for_conv2d_flatten(input, kernel, bias=0, stride=1, padding=0):
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding))
    input_h, input_w = input.shape
    kernel_h, kernel_w = kernel.shape
    
    output_h = (math.floor((input_h - kernel_h)/stride) + 1)  # 卷积输出的高度
    output_w = (math.floor((input_w - kernel_w)/stride) + 1)  # 卷积输出的宽度
    output = torch.zeros(output_h, output_w)  # 初始化输出矩阵
    
    region_matrix = torch.zeros(output.numel(), kernel.numel())  # 存储着所有的拉平后的特征区域
    kernel_matrix = kernel.reshape((kernel.numel(),1))  # kernel的列向量（矩阵）形式
    row_index = 0
    for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
        for j in range(0, input_w-kernel_w+1, stride):  # 对宽度维进行遍历
            region = input[i:i+kernel_h, j:j+kernel_w]  # 取出被核滑动到的区域
            region_vector = torch.flatten(region)
            region_matrix[row_index] = region_vector
            row_index += 1
            
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape((output_h, output_w)) + bias
            
    return output

In [18]:
# 矩阵运算实现卷积的结果,flatten input 版本
mat_mul_conv_output_flatten = matrix_multiplication_for_conv2d_flatten(input, kernel, bias=bias, padding=1, stride=2)

# 调用PyTorch API卷积的结果
pytorch_api_conv_output = F.conv2d(input.reshape((1,1,input.shape[0],input.shape[1])), \
                              kernel.reshape((1,1,kernel.shape[0],kernel.shape[1])),\
                                  padding=1,\
                                  bias=bias, stride=2).squeeze(0).squeeze(0)
flag = torch.allclose(mat_mul_conv_output_flatten, pytorch_api_conv_output)
print(flag)
print(mat_mul_conv_output_flatten)
print(pytorch_api_conv_output)

True
tensor([[-1.1560,  1.1963,  0.8068],
        [ 2.4367, -4.7094,  0.0379],
        [-0.3445,  2.3627,  1.7416]])
tensor([[-1.1560,  1.1963,  0.8068],
        [ 2.4367, -4.7094,  0.0379],
        [-0.3445,  2.3627,  1.7416]])


## 验证了flatten版本卷积与PyTorch官方卷积的结果，正确

## step3 用原始的矩阵运算来实现二维卷积，考虑batchsize维度和channel维度

In [19]:
def matrix_multiplication_for_conv2d_full(input, kernel, bias=0, stride=1, padding=0):
    # input,kernel 都是4维的张量
    if padding > 0:
        input = F.pad(input, (padding, padding, padding, padding, 0, 0, 0, 0))
    bs, in_channel, input_h, input_w = input.shape
    out_channel, in_channel, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channel)
    
    output_h = (math.floor((input_h - kernel_h)/stride) + 1)  # 卷积输出的高度
    output_w = (math.floor((input_w - kernel_w)/stride) + 1)  # 卷积输出的宽度
    output = torch.zeros(bs, out_channel, output_h, output_w)  # 初始化输出矩阵
    
    for ind in range(bs):
        for oc in range(out_channel):
            for ic in range(in_channel):
                for i in range(0, input_h-kernel_h+1, stride):  # 对高度维进行遍历
                    for j in range(0, input_w-kernel_w+1, stride):  # 对宽度维进行遍历
                        region = input[ind, ic, i:i+kernel_h, j:j+kernel_w]  # 取出被核滑动到的区域
                        output[ind, oc, int(i/stride), int(j/stride)] += torch.sum(region * kernel[oc, ic])  # 点乘，并赋值给输出位置的元素
            output[ind, oc] += bias[oc]
            
    return output

input = torch.randn(2, 2, 5, 5)  # bs*in_channel*in_h*in_w
kernel = torch.randn(3, 2, 3, 3)  # out_channel*in_channel*kernel_h*kernel_w
bias = torch.randn(3)

# 验证matrix_multiplication_for_conv2d_full与PyTorch官方API的结果一致性
pytorch_conv2d_api_output = F.conv2d(input, kernel, bias=bias, padding=1, stride=2)
mm_conv2d_full_output = matrix_multiplication_for_conv2d_full(input, kernel, bias=bias, padding=1, stride=2)
flag = torch.allclose(pytorch_conv2d_api_output, mm_conv2d_full_output)
print(flag)

True
