# Convolution 

## 1D convolution 

In [3]:
def convolve(signal, kernel):
    output = []
    kernel_size = len(kernel)
    padding = kernel_size // 2 # assume zero padding
    padded_signal = [0] * padding + signal + [0] * padding
    
    for i in range(padding, len(signal) + padding):
        sum = 0
        for j in range(kernel_size):
            sum += kernel[j] * padded_signal[i - padding + j]
        output.append(sum)
    
    return output


In [4]:
signal = [1, 2, 3, 4, 5, 6]
kernel = [1, 0, -1]
output = convolve(signal, kernel)
print(output)


[-2, -2, -2, -2, -2, 5]


## 2D convolution 

In [7]:
import numpy as np

def convolution(image, kernel):
    # get the size of the input image and kernel
    (image_height, image_width, image_channels) = image.shape
    (kernel_height, kernel_width, kernel_channels) = kernel.shape
    
    # calculate the padding needed for 'same' convolution
    pad_h = (kernel_height - 1) // 2
    pad_w = (kernel_width - 1) // 2
    
    # pad the input image with zeros
    padded_image = np.pad(image, ((pad_h, pad_h), (pad_w, pad_w), (0, 0)), 'constant')
    
    # create an empty output tensor
    output_height = image_height
    output_width = image_width
    output_channels = kernel_channels
    output = np.zeros((output_height, output_width, output_channels))
    
    # perform the convolution operation
    for i in range(output_height):
        for j in range(output_width):
            for k in range(output_channels):
                output[i, j, k] = np.sum(kernel[:, :, k:k+1] * 
                                         padded_image[i:i+kernel_height, j:j+kernel_width, :])
    
    return output


In [8]:
image = np.random.randn(32,32,10)
kernel = np.random.randn(3,3,3)
out = convolution(image, kernel)
out.shape

(32, 32, 3)

In [10]:
# create an example image and kernel
image = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]], [[9, 10], [11, 12]]])
kernel = np.array([[[1, 0], [0, -1]], [[0, 1], [-1, 0]]])

output = convolution(image, kernel)

In [9]:
import numpy as np

def convolution(image, kernel, padding=0, stride=1):
    # Get dimensions of image and kernel
    (image_height, image_width, image_channels) = image.shape
    (kernel_height, kernel_width, kernel_channels) = kernel.shape

    # Calculate the output dimensions
    output_height = (image_height - kernel_height + 2 * padding) // stride + 1
    output_width = (image_width - kernel_width + 2 * padding) // stride + 1
    output_channels = kernel_channels

    # Pad the image
    padded_image = np.pad(image, ((padding, padding), (padding, padding), (0, 0)), mode='constant')
    
    # Initialize the output matrix
    output = np.zeros((output_height, output_width, output_channels))

    # Perform the convolution
    for i in range(output_height):
        for j in range(output_width):
            for k in range(output_channels):
              h_start = i * stride
              h_end = h_start + kernel_height
              w_start = j * stride
              w_end = w_start + kernel_width
              output[i, j, k] = np.sum(kernel[:, :, k:k+1] * padded_image[h_start:h_end, w_start:w_end, :])

    return output

In [11]:

image = np.random.randn(32,5,10)
kernel = np.random.randn(3,3,3)
padding = 2
stride = 2

result = convolution(image, kernel, padding, stride)
print("Result of convolution:")
print(result.shape)


Result of convolution:
(17, 4, 3)


In [None]:
import numpy as np

def convolution(image, kernel, padding=0, stride=1, groups=1):
    # Get dimensions of image and kernel
    (image_height, image_width, image_channels) = image.shape
    (kernel_height, kernel_width, kernel_channels) = kernel.shape

    # Calculate the output dimensions
    output_height = (image_height - kernel_height + 2 * padding) // stride + 1
    output_width = (image_width - kernel_width + 2 * padding) // stride + 1
    output_channels = kernel_channels

    # Pad the image
    padded_image = np.pad(image, ((padding, padding), (padding, padding), (0, 0)), mode='constant')

    # Initialize the output matrix
    output = np.zeros((output_height, output_width, output_channels))

    # Perform the convolution
    for g in range(groups):
        # Divide input image and kernel into groups
        image_group = padded_image[:, :, g * (image_channels // groups):(g + 1) * (image_channels // groups)]
        kernel_group = kernel[:, :, g * (kernel_channels // groups):(g + 1) * (kernel_channels // groups)]

        for i in range(output_height):
            for j in range(output_width):
                for k in range(output_channels // groups):
                    h_start = i * stride
                    h_end = h_start + kernel_height
                    w_start = j * stride
                    w_end = w_start + kernel_width
                    output[i, j, g * (output_channels // groups) + k] = np.sum(
                        kernel_group[:, :, k] * image_group[h_start:h_end, w_start:w_end])

    return output


## 3D Conv

In [1]:
import torch
import torch.nn as nn
from einops.layers.torch import Rearrange

In [2]:
class Conv3DLayer(nn.Module):
    def __init__(self, in_dim, out_dim, n_frames):
        super().__init__()

        self.to_3d = Rearrange('(b t) c h w -> b c t h w', t=n_frames)
        self.to_2d = Rearrange('b c t h w -> (b t) c h w')

        k, p = (3, 1, 1), (1, 0, 0)
        self.block1 = nn.Sequential(
            nn.GroupNorm(32, in_dim),
            nn.SiLU(),
            nn.Conv3d(in_dim, out_dim, kernel_size=k, stride=1, padding=p)
        )
        self.block2 = nn.Sequential(
            nn.GroupNorm(32, out_dim),
            nn.SiLU(),
            nn.Conv3d(out_dim, out_dim, kernel_size=k, stride=1, padding=p)
        )

        self.alpha = nn.Parameter(torch.ones(1))

    def forward(self, x):
        h = self.to_3d(x)

        h = self.block1(h)
        h = self.block2(h)

        h = self.to_2d(h)

        with torch.no_grad():
            self.alpha.clamp_(0, 1)

        out = self.alpha * x + (1 - self.alpha) * h
        return out