In [1]:
import numpy as np
from milligrad import Tensor

# Conv1D
B, C_in, W_in = 16, 3, 100
K, C_out = 10, 5
W_out = W_in - K + 1 + 2*0

x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)

# create a view of the input tensor with the sliding window dimensions
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)

print(np.version.full_version)
print("biwk,iko->bow", strided.shape, kernel.shape, strided.strides)


out = np.einsum("biwk,iko->bow", strided, kernel, optimize=True)


print(out.shape)


x_t = Tensor(x)
kernel_t = Tensor(kernel)

out_t = x_t.conv1d(kernel_t, padding=0)

np.testing.assert_allclose(out, out_t.data, atol=1e-3)

1.26.4
biwk,iko->bow (16, 3, 91, 10) (3, 10, 5) (2400, 800, 8, 8)
(16, 5, 91)
(16, 3, 100) (3, 10, 5) 91 0
1.26.4
biwk,iko->bow (16, 3, 91, 10) (3, 10, 5) (2400, 800, 8, 8)


In [2]:
out_t = x_t.conv1d(kernel_t, padding=0)

(16, 3, 100) (3, 10, 5) 91 0
1.26.4
biwk,iko->bow (16, 3, 91, 10) (3, 10, 5) (2400, 800, 8, 8)


In [3]:
B, C_in, W_in = 128, 3, 1000
K, C_out = 3, 32
W_out = W_in - K + 1 + 2*0

In [4]:
%%timeit -n 100 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=False)


59.4 ms ± 314 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [5]:
%%timeit -n 100 -r 10
x = np.random.randn(B, C_in, W_in)
kernel = np.random.randn(C_in, K, C_out)
strided = np.lib.stride_tricks.as_strided(x,
    shape=(B, C_in, W_out, K),
    strides=x.strides + (x.strides[-1],) # configures array traversal: adding a stride to the last dimension to slide the window
)
out = np.einsum("biwk,iko->bow", strided, kernel, optimize=True)

17.4 ms ± 2.12 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [6]:
%%timeit -n 100 -r 10

x_t = Tensor.randn(B, C_in, W_in)
kernel_t = Tensor.randn(C_in, K, C_out)
out_t = x_t.conv1d(kernel_t)

(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 8000, 8, 8)
(128, 3, 1000) (3, 3, 32) 998 0
1.26.4
biwk,iko->bow (128, 3, 998, 3) (3, 3, 32) (24000, 80

KeyboardInterrupt: 

In [None]:
%%timeit -n 100 -r 10

with Tensor.no_grad():
    x_t = Tensor.randn(B, C_in, W_in)
    kernel_t = Tensor.randn(C_in, K, C_out)
    out_t = x_t.conv1d(kernel_t)

81.3 ms ± 2.76 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [None]:
Tensor._no_grad = True

In [None]:
%%timeit -n 100 -r 10

x_t = Tensor.randn(B, C_in, W_in)
kernel_t = Tensor.randn(C_in, K, C_out)
out_t = x_t.conv1d(kernel_t)

83.4 ms ± 3.79 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [None]:
# Conv2D
B, C_in, H_in, W_in = 32, 3, 90, 90
K, C_out = 10, 4
H_out, W_out = H_in - K + 1 + 2*0, W_in - K + 1 + 2*0

x = np.random.randn(B, C_in, H_in, W_in)
kernel = np.random.randn(C_in, K, K, C_out)

# create a view of the input tensor with the sliding window dimensions
*orig_strides, s2, s3 = x.strides
strided = np.lib.stride_tricks.as_strided(
    x,
    shape=(B, C_in, H_out, W_out, K, K),
    strides=(*orig_strides, s2, s3, s2, s3) # configures array traversal: twice s2 and s3 to select a 2D sliding window
)

out = np.einsum("bihwkl,iklo->bowh", strided, kernel)

In [None]:
out.shape

(32, 4, 81, 81)