# [2.5. Exercises for TensorIR](https://mlc.ai/chapter_tensor_program/tensorir_exercises.html)

Solutions for the proposed exercises:

In [79]:
import numpy as np
import tvm
from tvm.ir.module import IRModule
from tvm.script import tir as T

In [80]:
# init data
a = np.arange(16).reshape(4, 4)
b = np.arange(16, 0, -1).reshape(4, 4)

In [81]:
# numpy version
c_np = a + b
c_np

array([[16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16]])

In [82]:
# low-level numpy version
def lnumpy_add(a: np.ndarray, b: np.ndarray, c: np.ndarray):
  for i in range(4):
    for j in range(4):
      c[i, j] = a[i, j] + b[i, j]
c_lnumpy = np.empty((4, 4), dtype=np.int64)
lnumpy_add(a, b, c_lnumpy)
c_lnumpy

array([[16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16],
       [16, 16, 16, 16]])

In [83]:
# TensorIR version
@tvm.script.ir_module
class MyAdd:
  @T.prim_func
  def add(A: T.Buffer((4, 4), "int64"),
          B: T.Buffer((4, 4), "int64"),
          C: T.Buffer((4, 4), "int64")):
    T.func_attr({"global_symbol": "add"})
    for i, j in T.grid(4, 4):
      with T.block("C"):
        vi = T.axis.spatial(4, i)
        vj = T.axis.spatial(4, j)
        C[vi, vj] = A[vi, vj] + B[vi, vj]

rt_lib = tvm.build(MyAdd, target="llvm")
a_tvm = tvm.nd.array(a)
b_tvm = tvm.nd.array(b)
c_tvm = tvm.nd.array(np.empty((4, 4), dtype=np.int64))
rt_lib["add"](a_tvm, b_tvm, c_tvm)
np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol=1e-5)

# 2.5.1.2. Exercise 1: Broadcast Add

In [84]:
# init data
a = np.arange(16).reshape(4, 4)
b = np.arange(4, 0, -1).reshape(4)

In [85]:
# numpy version
c_np = a + b
c_np

array([[ 4,  4,  4,  4],
       [ 8,  8,  8,  8],
       [12, 12, 12, 12],
       [16, 16, 16, 16]])

## Low level numpy

In [86]:
# low-level numpy version
def lnumpy_add_bc(a: np.ndarray, b: np.ndarray, c: np.ndarray):
  for i in range(4):
    for j in range(4):
      c[i, j] = a[i, j] + b[j]
c_lnumpy = np.empty((4, 4), dtype=np.int64)
lnumpy_add_bc(a, b, c_lnumpy)
c_lnumpy

array([[ 4,  4,  4,  4],
       [ 8,  8,  8,  8],
       [12, 12, 12, 12],
       [16, 16, 16, 16]])

## TensorIR

In [87]:
# TensorIR version
@tvm.script.ir_module
class MyAdd:
  @T.prim_func
  def add(A: T.Buffer((4, 4), "int64"),
          B: T.Buffer((4), "int64"),
          C: T.Buffer((4, 4), "int64")):
    T.func_attr({"global_symbol": "add"})
    for i, j in T.grid(4, 4):
      with T.block("C"):
        vi = T.axis.spatial(4, i)
        vj = T.axis.spatial(4, j)
        C[vi, vj] = A[vi, vj] + B[vj]

rt_lib = tvm.build(MyAdd, target="llvm")
a_tvm = tvm.nd.array(a)
b_tvm = tvm.nd.array(b)
c_tvm = tvm.nd.array(np.empty((4, 4), dtype=np.int64))
rt_lib["add"](a_tvm, b_tvm, c_tvm)
np.testing.assert_allclose(c_tvm.numpy(), c_np, rtol=1e-5)

# 2.5.1.3. Exercise 2: 2D Convolution

In [88]:
N, CI, H, W, CO, K = 1, 1, 8, 8, 2, 3
OUT_H, OUT_W = H - K + 1, W - K + 1
data = np.arange(N*CI*H*W).reshape(N, CI, H, W)
weight = np.arange(CO*CI*K*K).reshape(CO, CI, K, K)

In [89]:
data[0, 0, 3, 3]

27

In [90]:
weight

array([[[[ 0,  1,  2],
         [ 3,  4,  5],
         [ 6,  7,  8]]],


       [[[ 9, 10, 11],
         [12, 13, 14],
         [15, 16, 17]]]])

In [112]:
# torch version
import torch

data_torch = torch.Tensor(data)
weight_torch = torch.Tensor(weight)
conv_torch = torch.nn.functional.conv2d(data_torch, weight_torch)
conv_torch = conv_torch.numpy().astype(np.int64)
conv_torch.shape

(1, 2, 6, 6)

## Low level numpy

In [106]:
# low-level numpy version
def lnumpy_conv2d(data: np.ndarray, weight: np.ndarray, H, W, K, CO):
  C = np.zeros([CO, OUT_W, OUT_H], dtype=int)
  print(data.shape)
  print(weight.shape)
  for co in range(CO):
    for dh in range(H-K+1):
      for dw in range(W-K+1):
        for r in range(K):
          for c in range(K):
            #s = s + data[0, 0, c, r] * weight[0, 0, c, r]
            C[co, dw, dh] = C[co, dw, dh] + data[0, 0, c+dw, r+dh] * weight[co, 0, c, r]
  return C

In [111]:
npconf = lnumpy_conv2d(data, weight, H, W, K, CO)
npconf.shape
#np.testing.assert_allclose(npconv, conv_torch, rtol=1e-5)

(1, 1, 8, 8)
(2, 1, 3, 3)


(2, 6, 6)

## TensorIR

__TODO:__ generalize the loop in that each `0` is replaced by a respective parameter.

In [113]:
@tvm.script.ir_module
class MyConv:
   @T.prim_func
   def conv (A: T.Buffer((N, CI, H, W),      "int64"),
             B: T.Buffer((CO, CI, K, K),     "int64"),
             C: T.Buffer((N, CO, OUT_H, OUT_W), "int64")):
      T.func_attr({"global_symbol": "conv", "tir.noalias": True})
      for a, b, c, d, e, f, g in T.grid(N, CI, CO, OUT_H, OUT_W, K, K):
         with T.block("C"):
            n  = T.axis.spatial(N, a)
            ci = T.axis.spatial(CI, b)
            co = T.axis.spatial(CO, c)
            dh = T.axis.spatial(OUT_H, d)
            dw = T.axis.spatial(OUT_W, e)
            vr = T.axis.spatial(K, f)
            vc = T.axis.spatial(K, g)
            C[n, co, dw, dh] = C[n, co, dw, dh] + A[n, ci, vc+dw, vr+dh] * B[co, ci, vc, vr]

In [122]:
rt_lib = tvm.build(MyConv, target="llvm")
data_tvm = tvm.nd.array(data)
weight_tvm = tvm.nd.array(weight)
conv_tvm = tvm.nd.array(np.zeros((N, CO, OUT_H, OUT_W), dtype=np.int64))
rt_lib["conv"](data_tvm, weight_tvm, conv_tvm)
np.testing.assert_allclose(conv_tvm.numpy(), conv_torch, rtol=1e-5)