In [1]:
import tensorwrap as tw
from tensorwrap import nn

# Array Tests:
1. (10, 10) Pure Matmul: 30.9 µs ± 342 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
2. (1e3, 1e4) Pure Matmul: 1.9 ms ± 5.05 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)

Out of memory if higher.

In [2]:
# Pure Matmul:
a = tw.randn((10, 10))
%timeit a @ a

30.9 µs ± 342 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [3]:
# Pure Matmul:
a = tw.randn((int(1e3), int(1e4)))
%timeit a @ a.T

1.9 ms ± 5.05 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Transformation Tests:
1. (1e2, 1e3) Jit matmul: 

136 µs ± 40.1 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)

94.5 µs ± 2.28 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)

In [2]:
@tw.function
def matmul():
    a = tw.randn((int(1e2), int(1e3)))
    return a @ a.T
%timeit matmul()

136 µs ± 40.1 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
%timeit matmul()

94.5 µs ± 2.28 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [2]:
# Pure Autodiff:
@tw.grad
def grad_fn(x):
    return tw.sum(x @ x.T)

a = tw.randn((100, 1000))
%timeit grad_fn(a)

2.38 ms ± 541 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [2]:
# Chained Autodiff:
@tw.function
@tw.grad
def grad_fn(x):
    return tw.sum(x @ x.T)

a = tw.randn((100, 1000))
%timeit grad_fn(a)

103 µs ± 1.6 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [3]:
%timeit grad_fn(a)

102 µs ± 946 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
