In [1]:
import tvm
from tvm import te
import numpy as np

n = 100

def tvm_vector_add(dtype):
    A = te.placeholder((n,), dtype=dtype)
    B = te.placeholder((n,), dtype=dtype)
    C = te.compute(A.shape, lambda i: A[i] + B[i])
    print('expression dtype:', A.dtype, B.dtype, C.dtype)
    s = te.create_schedule(C.op)
    return tvm.build(s, [A, B, C])

In [2]:
mod = tvm_vector_add('float32')
mod = tvm_vector_add('int32')

expression dtype: float32 float32 float32
expression dtype: int32 int32 int32


In [3]:
def get_abc(shape, constructor=None):
    """Return random a, b and empty c with the same shape.
    """
    np.random.seed(0)
    a = np.random.normal(size=shape).astype(np.float32)
    b = np.random.normal(size=shape).astype(np.float32)
    c = np.empty_like(a)
    if constructor:
        a, b, c = [constructor(x) for x in (a, b, c)]
    return a, b, c

In [4]:
# try other date type
def test_mod(mod, dtype):
    a, b, c = get_abc(n, lambda x: tvm.nd.array(x.astype(dtype)))
    print('tensor dtype:', a.dtype, b.dtype, c.dtype)
    mod(a, b, c)
    np.testing.assert_equal(c.asnumpy(), a.asnumpy() + b.asnumpy())

for dtype in ['float16', 'float64', 'int8','int16', 'int64']:
    mod = tvm_vector_add(dtype)
    test_mod(mod, dtype)

expression dtype: float16 float16 float16
tensor dtype: float16 float16 float16
expression dtype: float64 float64 float64
tensor dtype: float64 float64 float64
expression dtype: int8 int8 int8
tensor dtype: int8 int8 int8
expression dtype: int16 int16 int16
tensor dtype: int16 int16 int16
expression dtype: int64 int64 int64
tensor dtype: int64 int64 int64


In [5]:
def tvm_vector_add_2(dtype):
    A = te.placeholder((n,))
    B = te.placeholder((n,))
    C = te.compute(A.shape,
                    lambda i: A[i].astype(dtype) + B[i].astype(dtype))
    print('expression dtype:', A.dtype, B.dtype, C.dtype)
    s = te.create_schedule(C.op)
    return tvm.build(s, [A, B, C])

In [6]:
def test_mod_2(mod, dtype):
    a, b, c = get_abc(n)
    # by default `get_abc` returns NumPy ndarray in float32
    a_tvm, b_tvm = tvm.nd.array(a), tvm.nd.array(b)
    c_tvm = tvm.nd.array(c.astype(dtype))
    print('tensor dtype:', a_tvm.dtype, b_tvm.dtype, c_tvm.dtype)
    mod(a_tvm, b_tvm, c_tvm)
    np.testing.assert_equal(c_tvm.asnumpy(), a.astype(dtype) + b.astype(dtype))

mod = tvm_vector_add_2('int32')
test_mod_2(mod, 'int32')

expression dtype: float32 float32 int32
tensor dtype: float32 float32 int32


In [7]:
n = te.var(name='n')
type(n), n.dtype

(tvm.tir.expr.Var, 'int32')

In [8]:
A = te.placeholder((n,), name='A')
B = te.placeholder((n,), name='B')
C = te.compute((n), lambda i: A[i]+B[i], name='C' )
s = te.create_schedule(C.op)
tvm.lower(s, [A,B,C], simple_mode=True)

IRModuleNode( {GlobalVar(main): PrimFunc([A, B, C]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, n) {
    C[(i*stride)] = (A[(i*stride)] + B[(i*stride)])
  }
}
})

In [9]:
def test_mod(mod, n):
    a, b, c = get_abc(n, tvm.nd.array)
    mod(a, b, c)
    print('c.shape', c.shape)
    np.testing.assert_equal(c.asnumpy(), a.asnumpy()+b.asnumpy())

mod = tvm.build(s, [A,B,C])
test_mod(mod, 5)
test_mod(mod, 1000)

c.shape (5,)
c.shape (1000,)


In [10]:
# Multi-dimensional Shapes
def tvm_vector_add(ndim):
    A = te.placeholder([te.var() for _ in range(ndim)])
    B = te.placeholder(A.shape)
    C = te.compute(A.shape, lambda *i : A[i] + B[i], )
    s = te.create_schedule(C.op)
    return tvm.build(s, [A,B,C])

In [11]:
mod = tvm_vector_add(3)
test_mod(mod, (2,3,4))

c.shape (2, 3, 4)


In [12]:
n = te.var(name='n')
m = te.var(name='m')
A = te.placeholder((m,n), name='A')
B = te.compute((n,m), lambda i,j : A[j,i], name='B')
s = te.create_schedule(B.op)
tvm.lower(s, [A,B], simple_mode=True)

IRModuleNode( {GlobalVar(main): PrimFunc([A, B]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, n) {
    for (j, 0, m) {
      B[((i*stride) + (j*stride))] = A[((j*stride) + (i*stride))]
    }
  }
}
})

In [13]:
A.shape, B.shape

([m, n], [n, m])

In [14]:
a = tvm.nd.array(np.arange(12, dtype='float32',).reshape((3,4)))
b = tvm.nd.array(np.empty((4,3), dtype='float32'))
mod = tvm.build(s, [A,B])
mod(a, b)
a , b

(<tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 <tvm.nd.NDArray shape=(4, 3), cpu(0)>
 array([[ 0.,  4.,  8.],
        [ 1.,  5.,  9.],
        [ 2.,  6., 10.],
        [ 3.,  7., 11.]], dtype=float32))

In [15]:
C = te.compute((m*n,), lambda i : A[i//n][i%n], name='C')
s = te.create_schedule(C.op)
tvm.lower(s, [A,C], simple_mode=True)

IRModuleNode( {GlobalVar(main): PrimFunc([A, C]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, (m*n)) {
    C[i] = A[((floordiv(i, n)*stride) + (floormod(i, n)*stride))]
  }
}
})

In [16]:
mod = tvm.build(s, [A,C])
c = tvm.nd.array(np.empty((3*4,), dtype="float32"))
mod(a,c)
a, c

(<tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 <tvm.nd.NDArray shape=(12,), cpu(0)>
 array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11.],
       dtype=float32))

In [17]:
p, q = te.var(name='p'), te.var(name='q') # A[0][1] B [0][1]
D = te.compute((p,q), lambda i,j : A[(i*q+j)//n][(i*q+j) % n], name='D')
s = te.create_schedule(D.op)
tvm.lower(s,[A,D])

IRModuleNode( {GlobalVar(main): PrimFunc([A, D]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, p) {
    for (j, 0, q) {
      D[((i*stride) + (j*stride))] = A[((floordiv(((i*q) + j), n)*stride) + (floormod(((i*q) + j), n)*stride))]
    }
  }
}
})

In [18]:
d = tvm.nd.array(np.empty((5,4), dtype='float32'))
mod = tvm.build(s, [A,D])
mod(a, d)
a, d

(<tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 <tvm.nd.NDArray shape=(5, 4), cpu(0)>
 array([[ 0.0000000e+00,  1.0000000e+00,  2.0000000e+00,  3.0000000e+00],
        [ 4.0000000e+00,  5.0000000e+00,  6.0000000e+00,  7.0000000e+00],
        [ 8.0000000e+00,  9.0000000e+00,  1.0000000e+01,  1.1000000e+01],
        [-3.4082704e+35,  1.7824502e+04,  1.3592595e-43,  0.0000000e+00],
        [-4.8796550e+30,  4.5916347e-41, -4.8026899e+30,  4.5916347e-41]],
       dtype=float32))

In [19]:
A.shape, B.shape, C.shape, D.shape

([m, n], [n, m], [(m*n)], [p, q])

In [20]:
bi, bj, si, sj = [te.var(name) for name in ('bi', 'bj', 'si', 'sj')]
E = te.compute(((m-bi)//si,(n-bj)//sj), lambda i,j : A[i*si+bi][j*sj+bj], name='E')
s = te.create_schedule(E.op)
mod = tvm.build(s, [A, E, bi, si, bj, sj])

In [21]:
e = tvm.nd.array(np.empty((1,3), dtype='float32'))
mod(a, e, 1, 2, 1, 1)
a,e

(<tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 <tvm.nd.NDArray shape=(1, 3), cpu(0)>
 array([[5., 6., 7.]], dtype=float32))

# Reduction Operation

In [36]:
a = np.arange(12).astype('float32')
a.resize((3,4))
a, a.sum(axis=1)

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 array([ 6., 22., 38.], dtype=float32))

In [25]:
def sum_from_scratch(a, b):
    n = len(b)
    for i in range(n):
        b[i] = np.sum(a[i,:])
b = np.empty(3, dtype='float32')
sum_from_scratch(a, b)
a, b

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([ 6., 22., 38.], dtype=float32))

In [34]:
m, n = te.var('n'), te.var("m")
A = te.placeholder((m, n), name='A')
j = te.reduce_axis((0, n), name='j')
B = te.compute((m,), lambda i: te.sum(A[i, j], axis=j),name='B')
s = te.create_schedule(B.op)
tvm.lower(s, [A,B])

IRModuleNode( {GlobalVar(main): PrimFunc([a, b]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, n) {
    b[(i*stride)] = 0f
    for (j, 0, m) {
      b[(i*stride)] = (b[(i*stride)] + a[((i*stride) + (j*stride))])
    }
  }
}
})

In [39]:
mod = tvm.build(s, [A,B])
c = tvm.nd.array(np.empty((3,), dtype='float32'))
mod(tvm.nd.array(a), c)
a , c

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 <tvm.nd.NDArray shape=(3,), cpu(0)>
 array([ 6., 22., 38.], dtype=float32))

# if_then_else

In [44]:
m, n = te.var('m'), te.var('n')
A = te.placeholder((m,n), dtype='float32', name='A')
B = te.compute((m,n), lambda i,j : te.if_then_else(i >= j, A[i][j], 0.0))
s = te.create_schedule(B.op)
tvm.lower(s,[A, B])

IRModuleNode( {GlobalVar(main): PrimFunc([A, compute]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, m) {
    for (j, 0, n) {
      compute[((i*stride) + (j*stride))] = tir.if_then_else((i <= j), A[((i*stride) + (j*stride))], 0f)
    }
  }
}
})

In [45]:
a = np.arange(12, dtype='float32')
a.resize((3,4))
b = np.empty_like(a)
mod = tvm.build(s, [A,B])
mod(tvm.nd.array(a),tvm.nd.array(b))
a, b

(array([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]], dtype=float32),
 array([[-6.848942e+30,  4.591635e-41,  0.000000e+00,  0.000000e+00],
        [ 0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00],
        [ 0.000000e+00,  0.000000e+00,  0.000000e+00,  0.000000e+00]],
       dtype=float32))