In [1]:
import numpy as np
from milligrad import Tensor

# Conv1D
B, C_in, W_in = 16, 3, 100
K, C_out = 10, 5
W_out = W_in - K + 1 + 2*0

x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)

# create a view of the input tensor with the sliding window dimensions
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)



out = np.einsum("biwk,iko->bow", strided, kernel, optimize=True)

print("biwk,iko->bow", "or", f"{strided.shape},{kernel.shape}->{out.shape}")

biwk,iko->bow or (16, 3, 91, 10),(3, 10, 5)->(16, 5, 91)


In [2]:
B, C_in, W_in = 128, 3, 100
K, C_out = 3, 32
W_out = W_in - K + 1 + 2*0

In [3]:
%%timeit -n 10 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=False)


7.38 ms ± 1.07 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [4]:
%%timeit -n 10 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=True)

3.75 ms ± 1.01 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


optimize=True gives better results the larger the input is

In [5]:
B, C_in, W_in = 128, 3, 1000 # 10 times larger
K, C_out = 3, 32
W_out = W_in - K + 1 + 2*0

In [6]:
%%timeit -n 10 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=False)


70.9 ms ± 3.28 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [7]:
%%timeit -n 10 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=True)


28.5 ms ± 1.78 ms per loop (mean ± std. dev. of 10 runs, 10 loops each)


In [8]:
# Conv2D
B, C_in, H_in, W_in = 32, 3, 90, 90
K, C_out = 10, 4
H_out, W_out = H_in - K + 1 + 2*0, W_in - K + 1 + 2*0

x = np.random.randn(B, C_in, H_in, W_in)
kernel = np.random.randn(C_in, K, K, C_out)

# create a view of the input tensor with the sliding window dimensions
*orig_strides, s2, s3 = x.strides
strided = np.lib.stride_tricks.as_strided(
    x,
    shape=(B, C_in, H_out, W_out, K, K),
    strides=(*orig_strides, s2, s3, s2, s3) # configures array traversal: twice s2 and s3 to select a 2D sliding window
)

out = np.einsum("bihwkl,iklo->bowh", strided, kernel, optimize=True)
print("bihwkl,iklo->bowh", "or", f"{strided.shape},{kernel.shape}->{out.shape}")

bihwkl,iklo->bowh or (32, 3, 81, 81, 10, 10),(3, 10, 10, 4)->(32, 4, 81, 81)
