In [1]:
import torch
from torch import nn
import numpy as np

In [None]:
class myConv1d():
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, stride: int = 1) -> None:
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride

        # init parameters
        self.parameters = torch.randn((out_channels, in_channels, kernel_size), requires_grad=True)

        # init bias
        self.bias = torch.randn((out_channels, ), requires_grad=True)

    def __call__(self, input: torch.Tensor) -> torch.Tensor:
        assert input.shape[0] == self.in_channels, 'in channels not match!'
        assert input.shape[1] >= self.kernel_size, 'input to less!'
        calculated = torch.zeros((self.out_channels, input.size(1) - self.kernel_size + 1))
        for i_out in range(self.out_channels):
            for i_in in range(self.in_channels):
                for i_w in range(calculated.shape[1]):
                    # print(i_out, i_in, i_w)
                    calculated[i_out][i_w] += torch.dot(self.parameters[i_out, i_in], input[i_in, i_w:i_w + self.kernel_size])
        calculated += self.bias.reshape((self.out_channels, 1))
        return calculated

IN_CHANNEL = 1
OUT_CHANNEL = 2
KERNEL_SIZE = 3
HEIGHT = 3
WIDTH = 3
LENGTH = 4
t1 = torch.arange(IN_CHANNEL * LENGTH, dtype=torch.float)
t1.resize_(IN_CHANNEL, LENGTH)
t1

In [None]:
l1d = nn.Conv1d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE, dtype=torch.float)
l1d_my = myConv1d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)
# d2.parameters = d1._parameters['weight'].data
# d2.bias = d1._parameters['bias'].data
# d2(t), d1(t)
l1d_my(t1).sum().backward()
l1d_my.parameters.grad, l1d_my.bias.grad

In [None]:
class myConv2d():
    def __init__(self, in_channels: int, out_channels: int, kernel_size: tuple[int, int], stride: int = 1, bias: bool = True) -> None:
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.use_bias = bias

        # init parameters
        self.parameters = torch.randn((self.out_channels, self.in_channels, *kernel_size), requires_grad=True)

        # init bias
        if bias:
            self.bias = torch.randn((self.out_channels, ), requires_grad=True)

    def __call__(self, input: torch.Tensor) -> torch.Tensor:
        assert input.shape[0] == self.in_channels, 'in channels not match!'
        assert input.shape[1] >= self.kernel_size[0] and input.shape[2] >= self.kernel_size[1], 'input to less!'
        calculated = torch.zeros(self.out_channels, input.shape[1] - self.kernel_size[0] + 1, input.shape[2] - self.kernel_size[1] + 1)
        for i_out in range(calculated.shape[0]):
            for i_in in range(self.in_channels):
                for i_w in range(calculated.shape[1]):
                    for i_h in range(calculated.shape[2]):
                        calculated[i_out][i_w][i_h] += torch.sum(self.parameters[i_out, i_in] * input[i_in, i_w:i_w + self.kernel_size[0], i_h:i_h + self.kernel_size[1]])
        if self.use_bias:
            calculated += self.bias.reshape((self.out_channels, 1, 1))
        return calculated

    def zero_grad(self):
        self.parameters.grad.zero_()
        if self.use_bias:
            self.bias.grad.zero_()

# layer init parameters
IN_CHANNEL = 2
OUT_CHANNEL = 2
KERNEL_SIZE = (3, 3)
# input tensor: in_channels * width * height
HEIGHT = 4
WIDTH = 4
t2 = torch.arange(IN_CHANNEL * WIDTH * HEIGHT, dtype=torch.float)
t2.resize_(IN_CHANNEL, WIDTH, HEIGHT)
t2

In [None]:
l2d = nn.Conv2d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)
l2d_my = myConv2d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)
# d4.parameters = d3._parameters['weight'].data
# d4.bias = d3._parameters['bias'].data
# d3(t1), d4(t1)
l2d_my(t2).sum().backward()
l2d_my.parameters.grad

In [None]:
# 2d convolution operate
def corr2d(X: torch.Tensor, K: torch.Tensor):  
    h, w = K.shape
    Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            Y[i, j] = (X[i:i + h, j:j + w] * K).sum()
    return Y

X = torch.ones((6, 8))
X[:, 2:6] = 0
K = torch.tensor([[1.0, -1.0]]) # <---target
Y = corr2d(X, K)
# conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
conv2d = myConv2d(1, 1, kernel_size=K.shape, bias=False)

X = X.reshape((1, 6, 8))
Y = Y.reshape((1, 6, 7))
lr = 3e-2  # learn rate

for i in range(20):
    Y_hat = conv2d(X)
    l = (Y_hat - Y) ** 2
    l.sum().backward()
    with torch.no_grad():
        conv2d.parameters -= lr * conv2d.parameters.grad
    conv2d.zero_grad()
    if (i + 1) % 2 == 0:
        print(f'epoch {i+1}, loss {l.sum():.3f}')
conv2d.parameters

In [None]:
class myConv3d():
    def __init__(self, in_channels: int, out_channels: int, kernel_size: tuple[int, int, int], stride: int = 1, bias: bool = True) -> None:
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.use_bias = bias

        # init parameters
        self.parameters = torch.randn((self.out_channels, self.in_channels, *kernel_size), requires_grad=True)

        # init bias
        if bias:
            self.bias = torch.randn((self.out_channels, ), requires_grad=True)
    
    # calculate conv3d use kernel Y on X
    @staticmethod
    def conv3d(X: torch.Tensor, Y: torch.Tensor, output: torch.Tensor):
        depth_cal = X.shape[0] - Y.shape[0] + 1
        width_cal = X.shape[1] - Y.shape[1] + 1
        height_cal = X.shape[2] - Y.shape[2] + 1
        for i_d in range(depth_cal):
            for i_w in range(width_cal):
                for i_h in range(height_cal):
                    output[i_d][i_w][i_h] += torch.sum(X[i_d:i_d + Y.shape[0], \
                                                         i_w:i_w + Y.shape[1], \
                                                         i_h:i_h + Y.shape[2]] * Y)

    def __call__(self, input: torch.Tensor) -> torch.Tensor:
        assert input.shape[0] == self.in_channels, 'in channels not match!'
        assert input.shape[1] >= self.kernel_size[0] and input.shape[2] >= self.kernel_size[1] and input.shape[3] >= self.kernel_size[2], 'input to less!'
        calculated = torch.zeros(self.out_channels, input.shape[1] - self.kernel_size[0] + 1, \
                                                    input.shape[2] - self.kernel_size[1] + 1, \
                                                    input.shape[3] - self.kernel_size[2] + 1)
        for i_out in range(self.out_channels):
            for i_in in range(self.in_channels):
                self.conv3d(input[i_in], self.parameters[i_out, i_in], calculated[i_out])
        if self.use_bias:
            calculated += self.bias.reshape((self.out_channels, 1, 1, 1))
        return calculated

    def zero_grad(self):
        self.parameters.grad.zero_()
        if self.use_bias:
            self.bias.grad.zero_()

# layer init parameters
IN_CHANNEL = 3
OUT_CHANNEL = 2
KERNEL_SIZE = (2, 2, 1)
# input tensor: in_channels * depth * width * height
DEPTH = 4
WIDTH = 3
HEIGHT = 2
t3 = torch.arange(IN_CHANNEL * DEPTH * WIDTH * HEIGHT, dtype=torch.float)
t3.resize_(IN_CHANNEL, DEPTH, WIDTH, HEIGHT)
t3

In [None]:
l3d = nn.Conv3d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)
l3d_my = myConv3d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)

# compare
l3d_my.parameters = l3d._parameters['weight'].data
l3d_my.bias = l3d._parameters['bias'].data
l3d(t3).sum(), l3d_my(t3).sum()

# backward
# l3d_my(t3).sum().backward()
# l3d_my.parameters.grad, l3d_my.bias.grad

In [None]:
# 3d convolution operate
def corr3d(X: torch.Tensor, K: torch.Tensor):  
    d, w, h = K.shape
    Y = torch.zeros((X.shape[0] - d + 1, X.shape[1] - w + 1, X.shape[2] - h + 1))
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            for k in range(Y.shape[2]):
                Y[i, j, k] = (X[i:i + d, j:j + w, k:k + h] * K).sum()
    return Y

X = torch.ones((6, 8, 7))
X[:, 2:6, 3:5] = 0
K = torch.tensor([[[1.0, -1.0], [-1.0, 1.0]]]) # <---target
Y = corr3d(X, K)
# conv2d = nn.Conv2d(1, 1, kernel_size=(1, 2), bias=False)
conv3d = myConv3d(1, 1, kernel_size=K.shape, bias=False)

X = X.reshape((1, 6, 8, 7))
Y = Y.reshape((1, 6, 7, 6))
lr = 7e-4  # learn rate

for i in range(100):
    Y_hat = conv3d(X)
    l = (Y_hat - Y) ** 2
    l.sum().backward()
    with torch.no_grad():
        conv3d.parameters -= lr * conv3d.parameters.grad
    conv3d.zero_grad()
    if (i + 1) % 10 == 0:
        print(f'epoch {i+1}, loss {l.sum():.3f}')
conv3d.parameters

In [None]:
class myConv4d():
    def __init__(self, in_channels: int, out_channels: int, kernel_size: tuple[int, int, int, int], stride: int = 1, bias: bool = True) -> None:
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.use_bias = bias

        # init parameters
        self.parameters = torch.randn((self.out_channels, self.in_channels, *kernel_size), requires_grad=True)

        # init bias
        if bias:
            self.bias = torch.randn((self.out_channels, ), requires_grad=True)
    
    # calculate conv3d use kernel Y on X
    @staticmethod
    def conv4d(X: torch.Tensor, Y: torch.Tensor, output: torch.Tensor):
        dim1_cal = X.shape[0] - Y.shape[0] + 1
        dim2_cal = X.shape[1] - Y.shape[1] + 1
        dim3_cal = X.shape[2] - Y.shape[2] + 1
        dim4_cal = X.shape[3] - Y.shape[3] + 1
        for i_1 in range(dim1_cal):
            for i_2 in range(dim2_cal):
                for i_3 in range(dim3_cal):
                    for i_4 in range(dim4_cal):
                        output[i_1][i_2][i_3][i_4] += torch.sum(X[i_1:i_1 + Y.shape[0], \
                                                            i_2:i_2 + Y.shape[1], \
                                                            i_3:i_3 + Y.shape[2], \
                                                            i_4:i_4 + Y.shape[3]] * Y)

    def __call__(self, input: torch.Tensor) -> torch.Tensor:
        assert input.shape[0] == self.in_channels, 'in channels not match!'
        assert input.shape[1] >= self.kernel_size[0] and \
               input.shape[2] >= self.kernel_size[1] and \
               input.shape[3] >= self.kernel_size[2] and \
               input.shape[4] >= self.kernel_size[3], 'input to less!'
        calculated = torch.zeros(self.out_channels, input.shape[1] - self.kernel_size[0] + 1, \
                                                    input.shape[2] - self.kernel_size[1] + 1, \
                                                    input.shape[3] - self.kernel_size[2] + 1, \
                                                    input.shape[4] - self.kernel_size[3] + 1)
        for i_out in range(self.out_channels):
            for i_in in range(self.in_channels):
                self.conv4d(input[i_in], self.parameters[i_out, i_in], calculated[i_out])
        if self.use_bias:
            calculated += self.bias.reshape((self.out_channels, 1, 1, 1, 1))
        return calculated

    def zero_grad(self):
        self.parameters.grad.zero_()
        if self.use_bias:
            self.bias.grad.zero_()

# layer init parameters
IN_CHANNEL = 2
OUT_CHANNEL = 2
KERNEL_SIZE = (2, 2, 1, 3)
# input tensor: in_channels * depth * width * height
DIM1 = 4
DIM2 = 3
DIM3 = 2
DIM4 = 4
t4 = torch.arange(IN_CHANNEL * DIM1 * DIM2 * DIM3 * DIM4, dtype=torch.float)
t4.resize_(IN_CHANNEL, DIM1, DIM2, DIM3, DIM4)

In [23]:
l4d_my = myConv4d(IN_CHANNEL, OUT_CHANNEL, KERNEL_SIZE)

# backward
l4d_my(t4).sum()

tensor(-21986.5996, grad_fn=<SumBackward0>)