In [1]:
import numpy as np

def conv2d(image, kernel, stride=1, padding=0):
    # Ensure numpy arrays
    image = np.array(image)
    kernel = np.array(kernel)
    
    H, W = image.shape
    kH, kW = kernel.shape
    
    # Pad image
    if padding > 0:
        image_padded = np.pad(image, ((padding, padding), (padding, padding)), mode='constant')
    else:
        image_padded = image
    
    out_H = (H + 2*padding - kH)//stride + 1
    out_W = (W + 2*padding - kW)//stride + 1
    output = np.zeros((out_H, out_W))
    
    # Flip kernel (for convolution)
    kernel_flipped = np.flipud(np.fliplr(kernel))
    
    # Perform convolution
    for i in range(out_H):
        for j in range(out_W):
            region = image_padded[i*stride : i*stride+kH, j*stride : j*stride+kW]
            output[i, j] = np.sum(region * kernel_flipped)
    
    return output

In [2]:
def conv2d_multi_channel(image, kernels, stride=1, padding=0):
    # image: (C_in, H, W)
    # kernels: (C_out, C_in, kH, kW)
    
    C_in, H, W = image.shape
    C_out, _, kH, kW = kernels.shape

    if padding > 0:
        image_padded = np.pad(image, ((0, 0), (padding, padding), (padding, padding)), mode='constant')
    else:
        image_padded = image

    out_H = (H + 2*padding - kH)//stride + 1
    out_W = (W + 2*padding - kW)//stride + 1
    output = np.zeros((C_out, out_H, out_W))

    # Perform convolution for each output channel
    for co in range(C_out):
        for ci in range(C_in):
            kernel_flipped = np.flip(kernels[co, ci])
            for i in range(out_H):
                for j in range(out_W):
                    region = image_padded[ci, i*stride : i*stride+kH, j*stride : j*stride+kW]
                    output[co, i, j] += np.sum(region * kernel_flipped)
    return output

In [3]:
import numpy as np
from numpy.lib.stride_tricks import sliding_window_view

def conv2d_vec(image, kernel, stride=1, padding=0):
    """
    image:  (H, W)
    kernel: (kH, kW)  -- spatial will be flipped (true convolution)
    returns: (out_H, out_W)
    """
    image = np.asarray(image)
    kernel = np.asarray(kernel)

    kH, kW = kernel.shape
    # pad image on spatial axes
    if padding > 0:
        image = np.pad(image, ((padding, padding), (padding, padding)), mode='constant')

    H, W = image.shape
    out_H = (H - kH) // stride + 1
    out_W = (W - kW) // stride + 1

    # extract all kH×kW patches (OH, OW, kH, kW)
    windows = sliding_window_view(image, (kH, kW))[::stride, ::stride]

    # flip kernel for convolution (vs cross-correlation)
    kflipped = kernel[::-1, ::-1]

    # einsum over the last two dims (kH, kW)
    # windows: (OH, OW, kH, kW); kflipped: (kH, kW) -> (OH, OW)
    out = np.einsum('ijpq,pq->ij', windows, kflipped)
    return out

In [4]:
def conv2d_multi_vec(image, kernels, stride=1, padding=0):
    """
    image:   (C_in, H, W)
    kernels: (C_out, C_in, kH, kW)
    returns: (C_out, out_H, out_W)
    """
    image = np.asarray(image)
    kernels = np.asarray(kernels)

    C_in, H, W = image.shape
    C_out, Cin_k, kH, kW = kernels.shape
    assert Cin_k == C_in, "kernels second dim must equal image channels"

    # pad spatial dims only
    if padding > 0:
        image = np.pad(image, ((0,0), (padding, padding), (padding, padding)), mode='constant')
        H, W = image.shape[1:]

    out_H = (H - kH) // stride + 1
    out_W = (W - kW) // stride + 1

    # sliding windows over spatial axes (H,W), per channel
    # windows shape: (C_in, out_H, out_W, kH, kW) after striding
    windows = sliding_window_view(image, (kH, kW), axis=(1,2))  # -> (C_in, H-kH+1, W-kW+1, kH, kW)
    windows = windows[:, ::stride, ::stride, :, :]              # -> (C_in, out_H, out_W, kH, kW)

    # flip kernels spatially for convolution
    kflipped = kernels[..., ::-1, ::-1]  # (C_out, C_in, kH, kW)

    # Contract over (C_in, kH, kW) → result (C_out, out_H, out_W)
    # windows indices: c,i,j,p,q ; kernels: o,c,p,q → out: o,i,j
    out = np.einsum('cijpq,ocpq->oij', windows, kflipped)
    return out

# Torch and as strided

In [5]:
import torch
import torch.nn.functional as F

def conv2d_single_as(image: torch.Tensor, kernel: torch.Tensor, stride=1, padding=0, flip=False):
    """
    image:  (H, W)
    kernel: (kH, kW)
    returns: (OH, OW)
    """
    assert image.dim() == 2 and kernel.dim() == 2
    kH, kW = kernel.shape

    # pad on spatial dims (left,right,top,bottom)
    x = F.pad(image.unsqueeze(0).unsqueeze(0), (padding, padding, padding, padding)).squeeze(0).squeeze(0)

    H, W = x.shape
    OH = (H - kH) // stride + 1
    OW = (W - kW) // stride + 1

    sH, sW = x.stride()
    # raw sliding windows (unit step)
    win = x.as_strided(size=(H - kH + 1, W - kW + 1, kH, kW),
                       stride=(sH, sW, sH, sW))
    # apply stride by slicing
    win = win[::stride, ::stride]                    # (OH, OW, kH, kW)

    K = kernel.flip(0,1) if flip else kernel
    out = torch.einsum('ijpq,pq->ij', win, K)
    return out

In [6]:
def conv2d_multi_as(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor | None = None,
                    stride=1, padding=0, flip=False):
    """
    x:      (C_in, H, W)
    weight: (C_out, C_in, kH, kW)
    returns: (C_out, OH, OW)
    """
    assert x.dim() == 3 and weight.dim() == 4
    C_in, H0, W0 = x.shape
    C_out, Cin_w, kH, kW = weight.shape
    assert Cin_w == C_in

    # pad spatial only
    xp = F.pad(x.unsqueeze(0), (padding, padding, padding, padding)).squeeze(0)
    C, H, W = xp.shape

    OH = (H - kH) // stride + 1
    OW = (W - kW) // stride + 1

    sC, sH, sW = xp.stride()
    # (C_in, H-kH+1, W-kW+1, kH, kW)
    win = xp.as_strided(size=(C, H - kH + 1, W - kW + 1, kH, kW),
                        stride=(sC, sH, sW, sH, sW))
    win = win[:, ::stride, ::stride]                      # (C_in, OH, OW, kH, kW)

    K = weight.flip(-2, -1) if flip else weight           # (C_out, C_in, kH, kW)

    # contract over (C_in, kH, kW) → (C_out, OH, OW)
    out = torch.einsum('cijpq,ocpq->oij', win, K)
    if bias is not None:
        out = out + bias.view(-1, 1, 1)
    return out

In [10]:
torch.rand((15,4,19)).unsqueeze(0).shape

torch.Size([1, 15, 4, 19])

In [7]:
def conv2d_batched_as(x: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor | None = None,
                      stride=1, padding=0, flip=False):
    """
    x:      (N, C_in, H, W)
    weight: (C_out, C_in, kH, kW)
    returns: (N, C_out, OH, OW)
    """
    assert x.dim() == 4 and weight.dim() == 4
    N, C_in, H0, W0 = x.shape
    C_out, Cin_w, kH, kW = weight.shape
    assert Cin_w == C_in

    # pad spatial dims
    xp = F.pad(x, (padding, padding, padding, padding))

    Np, C, H, W = xp.shape
    OH = (H - kH) // stride + 1
    OW = (W - kW) // stride + 1

    sN, sC, sH, sW = xp.stride()
    # (N, C_in, H-kH+1, W-kW+1, kH, kW)
    win = xp.as_strided(size=(N, C, H - kH + 1, W - kW + 1, kH, kW),
                        stride=(sN, sC, sH, sW, sH, sW))
    win = win[:, :, ::stride, ::stride]                   # (N, C_in, OH, OW, kH, kW)

    K = weight.flip(-2, -1) if flip else weight

    # contract over (C_in, kH, kW): 'ncijpq,ocpq->noij'
    out = torch.einsum('ncijpq,ocpq->noij', win, K)
    if bias is not None:
        out = out + bias.view(1, -1, 1, 1)
    return out