In [1]:
import torch

In [2]:
inp = torch.randn(1, 3, 10, 12)
w = torch.randn(2, 3, 4, 5)

In [3]:
class Handmade_conv2d_implementation():
    def __init__(self, weights, bias=None, stride=1, padding=0, dilation=1):
        self.weights = weights
        self.bias = bias
        self.stride = stride
        self.padding = padding
        self.dilation = dilation
        
    def __call__(self, inp):
        if self.padding > 0:
            inp = torch.nn.functional.pad(inp, (self.padding, self.padding, self.padding, self.padding))

        out_height = (inp.shape[2] - self.dilation * (self.weights.shape[2] - 1) - 1) // self.stride + 1
        out_width = (inp.shape[3] - self.dilation * (self.weights.shape[3] - 1) - 1) // self.stride + 1
        
        ans = torch.zeros((inp.shape[0], self.weights.shape[0], out_height, out_width))
        for batch in range(ans.shape[0]):
            for out_channel in range(ans.shape[1]):
                for in_channel in range(inp.shape[1]):
                    for h in range(ans.shape[2]):
                        for w in range(ans.shape[3]):
                            h_in = h * self.stride
                            w_in = w * self.stride
                            
                            kernel = self.weights[out_channel, in_channel, :, :]
                            region = inp[batch, in_channel, h_in:h_in + self.dilation * (self.weights.shape[2] - 1) + 1:self.dilation, w_in:w_in + self.dilation * (self.weights.shape[3] - 1) + 1:self.dilation]

                            ans[batch, out_channel, h, w] += torch.sum(region * kernel)
                if self.bias is not None:
                    ans[batch, out_channel, :, :] += bias[out_channel]
                    
        return ans

In [4]:
custom_conv2d_layer = Handmade_conv2d_implementation(weights=w, stride=3, padding=2, dilation=2)
out = custom_conv2d_layer(inp)

In [5]:
print((torch.nn.functional.conv2d(inp, w, stride=3, padding=2, dilation=2) - out).abs().max())

tensor(3.8147e-06)
