In [1]:
import tvm
from tvm import te
import numpy as np

# Broadcast

In [11]:
# 2 dimension
def get_broadcast_abc(shape1, shape2):
    # Validation
    assert len(shape1) == 2 and len(shape2) == 2 , "Must to be 2-D"
    for i in range(len(shape1)):
        assert shape1[i] == shape2[i] or shape1[i] == 1 or shape2[i] ==1 , "Broadcast shape error"
    A = te.placeholder(shape1, dtype='float32',name='a')
    B = te.placeholder(shape2, dtype='float32',name='b')
    m = shape1[0] if shape2[0] == 1 else shape2[0]
    n = shape1[1] if shape2[1] == 1 else shape2[1]
    f = lambda i,j : A[ 0 if shape1[0] == 1 else i][0 if shape1[1] == 1 else j] + \
    B[0 if shape2[0] ==1 else i][0 if shape2[1] ==1 else j]
    C = te.compute((m,n), f, name='c')
    return A, B, C

In [13]:
m, n = [te.var(name) for name in ('m', 'n')]
shape1 = (m, 1)
shape2 = (m, n)
A, B, C = get_broadcast_abc(shape1, shape2)

In [15]:
s = te.create_schedule(C.op)
tvm.lower(s, [A,B,C], simple_mode=True)

IRModuleNode( {GlobalVar(main): PrimFunc([a, b, c]) attrs={"global_symbol": "main", "tir.noalias": (bool)1} {
  for (i, 0, m) {
    for (j, 0, n) {
      c[((i*stride) + (j*stride))] = (a[(i*stride)] + b[((i*stride) + (j*stride))])
    }
  }
}
})

In [37]:
a = tvm.nd.array(np.arange(3, dtype='float32').reshape(3,1))
b = tvm.nd.array(np.arange(4, dtype='float32').reshape((1,4)))
c = tvm.nd.array(np.empty((3,4), dtype='float32'))
mod = tvm.build(s, [A, B, C])
mod(a, b, c)
a, b, c

(<tvm.nd.NDArray shape=(3, 1), cpu(0)>
 array([[0.],
        [1.],
        [2.]], dtype=float32),
 <tvm.nd.NDArray shape=(1, 4), cpu(0)>
 array([[0., 1., 2., 3.]], dtype=float32),
 <tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[0., 1., 2., 3.],
        [1., 2., 3., 4.],
        [2., 3., 4., 5.]], dtype=float32))

# Matrix Multiplication

In [38]:
def tvm_matrix_multi():
    m, n, l = [te.var(name) for name in ('m','n','l')]
    A = te.placeholder((m, l), dtype='float32', name='a')
    B = te.placeholder((l, n), dtype='float32', name='b')
    k = te.reduce_axis((0,l), name='k')
    f = lambda i, j : te.sum(A[i,k]*B[k,j], axis=k)
    C = te.compute((m,n), f, name='c')
    return A, B, C

In [39]:
A, B, C = tvm_matrix_multi()

In [40]:
s = te.create_schedule(C.op)
mod = tvm.build(s, [A,B,C])

In [41]:
# Save to the d2ltvm package.
def get_abc(shape, constructor=None):
    """Return random a, b and empty c with the same shape.
    """
    np.random.seed(0)
    a = np.random.normal(size=shape).astype(np.float32)
    b = np.random.normal(size=shape).astype(np.float32)
    c = np.empty_like(a)
    if constructor:
        a, b, c = [constructor(x) for x in (a, b, c)]
    return a, b, c

In [44]:
a, b, c = get_abc((2,2), tvm.nd.array)
a, b, c

(<tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[1.7640524, 0.4001572],
        [0.978738 , 2.2408931]], dtype=float32),
 <tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[ 1.867558  , -0.9772779 ],
        [ 0.95008844, -0.1513572 ]], dtype=float32),
 <tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[1.7640524, 0.4001572],
        [0.978738 , 2.2408931]], dtype=float32))

In [45]:
mod(a, b, c)
a, b, c

(<tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[1.7640524, 0.4001572],
        [0.978738 , 2.2408931]], dtype=float32),
 <tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[ 1.867558  , -0.9772779 ],
        [ 0.95008844, -0.1513572 ]], dtype=float32),
 <tvm.nd.NDArray shape=(2, 2), cpu(0)>
 array([[ 3.6746547, -1.784536 ],
        [ 3.9568968, -1.2956743]], dtype=float32))

# Convolution
## padding

In [59]:
def padding(X, ph, pw, val=0):
    assert len(X.shape) >= 2 , "shape error"
    nh, nw = X.shape[-2], X.shape[-1]
    return te.compute((*X.shape[0:-2], nh+2*ph, nw+2*pw), lambda *i: te.if_then_else(te.any(i[-2]<ph, i[-1]<pw, i[-2]>=nh+ph, i[-1]>=nw+pw), val, X[i[:-2]+(i[-2]-ph, i[-1]-pw)]), name='pd')

In [68]:
A = te.placeholder((3,4), name='a')
B = padding(A, 1, 0)
s = te.create_schedule(B.op)
mod = tvm.build(s, [A, B])
a = tvm.nd.array(np.ones((3,4), dtype='float32'))
b = tvm.nd.array(np.empty((3+2, 4),dtype='float32'))
mod(a, b)
a, b

(<tvm.nd.NDArray shape=(3, 4), cpu(0)>
 array([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=float32),
 <tvm.nd.NDArray shape=(5, 4), cpu(0)>
 array([[0., 0., 0., 0.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [0., 0., 0., 0.]], dtype=float32))

## convolution

In [67]:
# Save to the d2ltvm package.
def conv_out_size(n, k, p, s):
    """Compute the output size by given input size n (width or height),
    kernel size k, padding p, and stride s
    Return output size (width or height)
    """
    return (n - k + 2 * p)//s + 1

# Save to the d2ltvm package.
def conv(oc, ic, nh, nw, kh, kw, ph=0, pw=0, sh=1, sw=1):
    """Convolution

    oc, ic : output and input channels
    nh, nw : input width and height
    kh, kw : kernel width and height
    ph, pw : height and width padding sizes, default 0
    sh, sw : height and width strides, default 1
    """
    # reduction axes
    ric = te.reduce_axis((0, ic), name='ric')
    rkh = te.reduce_axis((0, kh), name='rkh')
    rkw = te.reduce_axis((0, kw), name='rkw')
    # output height and weights
    oh = conv_out_size(nh, kh, ph, sh)
    ow = conv_out_size(nw, kw, pw, sw)
    # pad X and then compute Y
    X = te.placeholder((ic, nh, nw), name='X')
    K = te.placeholder((oc, ic, kh, kw), name='K')
    PaddedX = padding(X, ph, pw) if ph != 0 or pw != 0 else X
    Y = te.compute(
        (oc, oh, ow),
        lambda c, i, j: te.sum(
            PaddedX[ric, i*sh+rkh, j*sw+rkw] * K[c, ric, rkh, rkw],
            axis=[ric, rkh, rkw]), name='Y')
    return X, K, Y, PaddedX