In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [58]:
in_channels, out_channels = 1, 2
kernel_size = 3
batch_size = 2
bias = False
input_size = [batch_size, in_channels, 4, 4]
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)

In [59]:
input_feature_map = torch.randn(input_size)
output_feature_map = conv_layer(input_feature_map)
print(output_feature_map)
print("---"*30)
output_feature_map1 = F.conv2d(input_feature_map, conv_layer.weight)
print(output_feature_map1)

tensor([[[[ 0.0322, -0.1890],
          [-0.0992,  0.4416]],

         [[ 0.1671, -0.4136],
          [ 0.1992,  0.1659]]],


        [[[-0.2014, -0.3555],
          [-1.0872, -0.1500]],

         [[ 0.8817, -0.2720],
          [-1.3173, -0.2607]]]], grad_fn=<ConvolutionBackward0>)
------------------------------------------------------------------------------------------
tensor([[[[ 0.0322, -0.1890],
          [-0.0992,  0.4416]],

         [[ 0.1671, -0.4136],
          [ 0.1992,  0.1659]]],


        [[[-0.2014, -0.3555],
          [-1.0872, -0.1500]],

         [[ 0.8817, -0.2720],
          [-1.3173, -0.2607]]]], grad_fn=<ConvolutionBackward0>)


In [60]:
inputs = input_feature_map
kernel = conv_layer.weight.data

In [66]:
inputs.shape, kernel.shape

(torch.Size([2, 1, 4, 4]), torch.Size([2, 1, 3, 3]))

In [79]:
from math import floor
#step1 原始的矩阵运算实现二维卷积
def matrix_multiplication_for_conv2d(inputs, kernel, stride=1, bias=None, padding=0):
    if padding > 0:
        inputs = F.pad(inputs, (padding, padding, padding, padding))
    
    bs, in_channels, input_h, input_w = inputs.shape
    out_channels, _, kernel_h, kernel_w = kernel.shape
    
    
    output_w = floor((input_w - kernel_w) / stride) + 1  #上取整
    output_h = floor((input_h - kernel_h) / stride) + 1
    
    output = torch.zeros((bs, out_channels, output_h, output_w))
    
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_w-kernel_w+1, stride):
            for c_out in range(out_channels):
                for c_in in range(in_channels):
                    output[:, c_out, i//stride, j//stride] += torch.sum(torch.mul(inputs[:, c_in, i:i+kernel_h, j:j+kernel_w], kernel[c_out, c_in, :, :]), \
                                                       dim=(1, 2))
                if bias is not None:
                    output[:, c_out, i//stride, j//stride] += bias[c_out]
    return output


In [80]:
output_feature_map1 = F.conv2d(input_feature_map, conv_layer.weight, padding=1)
print(output_feature_map1)
print("---"*30)
matrix_multiplication_for_conv2d(inputs, kernel, padding=1)

tensor([[[[ 0.3310,  0.0360, -0.2374, -0.1558],
          [ 0.2562,  0.0322, -0.1890, -0.1495],
          [ 0.0475, -0.0992,  0.4416,  0.3913],
          [-0.3610,  0.2912,  0.4568,  0.2339]],

         [[-0.0803, -0.5396, -0.2961, -0.1268],
          [ 0.0072,  0.1671, -0.4136, -0.5437],
          [-0.1491,  0.1992,  0.1659,  0.2691],
          [-0.0036, -0.1429, -0.3807,  0.1655]]],


        [[[ 0.6435, -0.1779, -0.1162, -0.3956],
          [-0.4974, -0.2014, -0.3555, -0.5743],
          [-0.2876, -1.0872, -0.1500,  0.3378],
          [-0.0538, -0.0606,  0.0077,  0.8676]],

         [[ 0.7068,  0.1829, -0.1004, -0.0776],
          [-0.1209,  0.8817, -0.2720, -0.5528],
          [-0.1386, -1.3173, -0.2607, -0.1748],
          [ 0.5444,  0.3498, -0.4395,  0.1488]]]],
       grad_fn=<ConvolutionBackward0>)
------------------------------------------------------------------------------------------


tensor([[[[ 0.3310,  0.0360, -0.2374, -0.1558],
          [ 0.2562,  0.0322, -0.1890, -0.1495],
          [ 0.0475, -0.0992,  0.4416,  0.3913],
          [-0.3610,  0.2912,  0.4568,  0.2339]],

         [[-0.0803, -0.5396, -0.2961, -0.1268],
          [ 0.0072,  0.1671, -0.4136, -0.5437],
          [-0.1491,  0.1992,  0.1659,  0.2691],
          [-0.0036, -0.1429, -0.3807,  0.1655]]],


        [[[ 0.6435, -0.1779, -0.1162, -0.3956],
          [-0.4974, -0.2014, -0.3555, -0.5743],
          [-0.2876, -1.0872, -0.1500,  0.3378],
          [-0.0538, -0.0606,  0.0077,  0.8676]],

         [[ 0.7068,  0.1829, -0.1004, -0.0776],
          [-0.1209,  0.8817, -0.2720, -0.5528],
          [-0.1386, -1.3173, -0.2607, -0.1748],
          [ 0.5444,  0.3498, -0.4395,  0.1488]]]])

In [74]:
bias_conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=True)
bias_output_feature_map = bias_conv_layer(input_feature_map)
print(bias_output_feature_map)
print("---"*30)
matrix_multiplication_for_conv2d(inputs, bias_conv_layer.weight.data, bias=bias_conv_layer.bias.data)

tensor([[[[ 0.0435,  0.0365],
          [-0.2333, -0.1044]],

         [[-0.1303,  0.4713],
          [ 0.2422,  0.2818]]],


        [[[ 0.3067,  0.1343],
          [-0.2522,  0.5407]],

         [[ 0.3796,  0.5556],
          [ 0.8235,  0.5901]]]], grad_fn=<ConvolutionBackward0>)
------------------------------------------------------------------------------------------


tensor([[[[ 0.0435,  0.0365],
          [-0.2333, -0.1044]],

         [[-0.1303,  0.4713],
          [ 0.2422,  0.2818]]],


        [[[ 0.3067,  0.1343],
          [-0.2522,  0.5407]],

         [[ 0.3796,  0.5556],
          [ 0.8235,  0.5901]]]])

In [87]:
#step2 向量内积实现
def flatten_multiplication_for_conv2d(inputs, kernel,stride=1,bias=0,padding=0):
    input_h, input_w = inputs.shape
    kernel_h, kernel_w = kernel.shape
    
    output_w = floor((input_w - kernel_w) / stride) + 1
    output_h = floor((input_h - kernel_h) / stride) + 1
    output = torch.zeros(output_h, output_w)
    
    region_matrix = torch.zeros(output.numel(), kernel.numel())  #存储输入拉平后
    kernel_matrix = torch.reshape(kernel, (-1,1))  #kernel转成列向量，其实是矩阵
    row_index = 0
    for i in range(0, input_h-kernel_h+1, stride):
        for j in range(0, input_w-kernel_w+1, stride):
            region = inputs[i:i+kernel_h, j:j+kernel_w]
            region_vector = torch.flatten(region) #取出被核滑动的输入区域，转成行向量
            region_matrix[i//stride * output_w + j//stride,:] = region_vector
            #region_matrix[row_index] = region_vactor
            #row_index += 1
    output_matrix = region_matrix @ kernel_matrix
    output = output_matrix.reshape(output_h, output_w)
    
    return output

In [94]:
inputs = torch.randn(5, 5)
kernel = torch.randn(3, 3)
output_feature_map = flatten_multiplication_for_conv2d(inputs, kernel, stride=2)
print(output_feature_map)
print("---"*30)
output_feature_map1 = F.conv2d(inputs.reshape((1,1,inputs.shape[0], inputs.shape[1])), kernel.reshape((1,1,kernel.shape[0], kernel.shape[1])), stride=2)
print(output_feature_map1)
torch.allclose(output_feature_map, output_feature_map1)

tensor([[ 3.0903, -1.2699],
        [ 2.1137,  0.9855]])
------------------------------------------------------------------------------------------
tensor([[[[ 3.0903, -1.2699],
          [ 2.1137,  0.9855]]]])


True

In [124]:
in_channels, out_channels, kernel_size = 3, 2, 3
batch_size = 4
input_size = [batch_size, in_channels, 5, 5]
bias = True
conv_layer = nn.Conv2d(in_channels, out_channels, kernel_size, bias=bias)
inputs = torch.randn(input_size)
output = conv_layer(inputs)
kernel = conv_layer.weight.data
bias = conv_layer.bias.data
print(inputs.shape, output.shape, kernel.shape, bias.shape)
print(output)

torch.Size([4, 3, 5, 5]) torch.Size([4, 2, 3, 3]) torch.Size([2, 3, 3, 3]) torch.Size([2])
tensor([[[[-0.1456,  0.8013, -1.0964],
          [-0.0609, -0.0595, -0.2931],
          [-1.0771,  0.2870, -0.9871]],

         [[ 0.1348, -0.8107,  0.1260],
          [-0.5127, -0.6665, -0.2516],
          [-1.1275, -0.0873,  0.9869]]],


        [[[-0.4831,  0.7683,  0.1324],
          [ 0.8327,  0.2712, -0.3787],
          [-0.0966, -0.2022, -0.6484]],

         [[-0.0431, -0.4704,  0.3328],
          [-0.1602,  0.2798,  0.0251],
          [ 0.1935,  0.0363, -0.5138]]],


        [[[ 0.2664,  0.5296,  0.2496],
          [ 1.3215,  0.8688,  0.8170],
          [ 0.3469,  0.4899,  0.5967]],

         [[ 0.0561, -0.3504, -0.1132],
          [-0.0435, -0.6647,  0.0925],
          [-0.0901,  0.3814, -0.1853]]],


        [[[ 0.4767,  0.6284,  0.0255],
          [-0.0665, -0.3060, -0.7124],
          [-0.0923,  0.1403,  0.5062]],

         [[-0.0350,  0.7030,  0.3673],
          [ 0.0914, -0.4134, -0

In [126]:
#step3 考虑bs、channels的向量内积实现
def complete_flatten_multiplication_for_conv2d(inputs, kernel, stride=1, bias=None, padding=0):
    if padding > 0:
        inputs = F.pad(inputs, (padding, padding, padding, padding, 0, 0, 0, 0)) #f.pad函数先height,width后bs,channel
    
    bs, in_channels, input_h, input_w = inputs.shape
    out_channels, _, kernel_h, kernel_w = kernel.shape
    if bias is None:
        bias = torch.zeros(out_channels)
    
    output_w = floor((input_w - kernel_w) / stride) + 1  #上取整
    output_h = floor((input_h - kernel_h) / stride) + 1
    output = torch.zeros((bs, out_channels, output_h, output_w))
    
    region_matrix = torch.zeros((bs,in_channels,output_h*output_w, kernel_h*kernel_w))  #存储输入拉平后
    kernel_matrix = torch.reshape(kernel, (out_channels, in_channels,kernel_h*kernel_w,1))  #kernel转成列向量，其实是矩阵
    out_matrix = torch.zeros((bs, out_channels, output_h*output_w, 1))
  
    for c_in in range(in_channels):
        row_index = 0
        for i in range(0, input_h-kernel_h+1, stride):
            for j in range(0, input_w-kernel_w+1, stride):
                region = inputs[:, c_in, i:i+kernel_h, j:j+kernel_w].reshape((bs, -1))
                region_matrix[:, c_in, row_index, :] = region
                row_index += 1
    print(region_matrix.shape)
    print(kernel_matrix.shape)
    for c_out in range(out_channels):       
        for c_in in range(in_channels):
            out_matrix[:, c_out, :, :] += torch.bmm(region_matrix[:, c_in], kernel_matrix[c_out, c_in].unsqueeze(0).tile(bs, 1, 1))
        out_matrix[:, c_out, :, :] += bias[c_out]
    output = out_matrix.reshape((bs, out_channels, output_h, output_w))
    
    return output

complete_flatten_multiplication_for_conv2d(inputs, kernel, bias=bias)

torch.Size([4, 3, 9, 9])
torch.Size([2, 3, 9, 1])


tensor([[[[-0.1456,  0.8013, -1.0964],
          [-0.0609, -0.0595, -0.2931],
          [-1.0771,  0.2870, -0.9871]],

         [[ 0.1348, -0.8107,  0.1260],
          [-0.5127, -0.6665, -0.2516],
          [-1.1275, -0.0873,  0.9869]]],


        [[[-0.4831,  0.7683,  0.1324],
          [ 0.8327,  0.2712, -0.3787],
          [-0.0966, -0.2022, -0.6484]],

         [[-0.0431, -0.4704,  0.3328],
          [-0.1602,  0.2798,  0.0251],
          [ 0.1935,  0.0363, -0.5138]]],


        [[[ 0.2664,  0.5296,  0.2496],
          [ 1.3215,  0.8688,  0.8170],
          [ 0.3469,  0.4899,  0.5967]],

         [[ 0.0561, -0.3504, -0.1132],
          [-0.0435, -0.6647,  0.0925],
          [-0.0901,  0.3814, -0.1853]]],


        [[[ 0.4767,  0.6284,  0.0255],
          [-0.0665, -0.3060, -0.7124],
          [-0.0923,  0.1403,  0.5062]],

         [[-0.0350,  0.7030,  0.3673],
          [ 0.0914, -0.4134, -0.3617],
          [ 0.4065,  0.5488, -0.4041]]]])