In [1]:
import numpy as np
import tvm
from tvm import relax
from tvm.ir.module import IRModule
from tvm.script import relax as R
from tvm.script import tir as T

[09:57:38] /home/ningzhangcaltech/Github_Repo/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.0.0git with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`
[09:57:38] /home/ningzhangcaltech/Github_Repo/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.0.0git with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`
[09:57:38] /home/ningzhangcaltech/Github_Repo/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.0.0git with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`


In [2]:
import torch
import torch.nn as nn
from torch import fx
from torch.nn import functional as F

## 通过 Builder 构造 IRModule

In [5]:
# 从张量表达式构造 TensorIR

from tvm import te

A = te.placeholder((128, 128), name="A", dtype="float32")    #### 不分配实际内存！
B = te.placeholder((128, 128), name="B", dtype="float32")

print(type(A))
print(A.shape)

<class 'tvm.te.tensor.Tensor'>
[128, 128]


In [6]:
def te_matmul(A: te.Tensor, B: te.Tensor) -> te.Tensor:
    assert A.shape[1] == B.shape[0]
    n = A.shape[0]
    m = B.shape[1]
    k = te.reduce_axis((0, A.shape[1]), name="k")
    return te.compute((n, m), lambda i, j: te.sum(A[i, k] * B[k, j], axis=k), name="matmul")

C = te_matmul(A, B)

te.create_prim_func([A, B, C]).show()

In [9]:
def te_relu(A: te.Tensor) -> te.Tensor:
    return te.compute(A.shape, lambda *i: te.max(A(*i), 0), name="relu")

### the first case of shape (10, )

X1 = te.placeholder((10,), name="X1", dtype="float32")
Y1 = te_relu(X1)
te.create_prim_func([X1, Y1]).show()

### the first case of shape (10, 20)

X2 = te.placeholder((10, 20), name="X1", dtype="float32")
Y2 = te_relu(X2)
te.create_prim_func([X2, Y2]).show()

In [11]:
### 算子融合 

C = te_matmul(A, B)
D = te_relu(C)
te.create_prim_func([A, B, D]).show()  # 导致 matmul 结果被临时分配 !

te.create_prim_func([A, B, C, D]).show() # 也可以存中间结果 ! 

## 使用 block builder 构造 IR module

In [18]:
A = relax.Var("A", relax.TensorStructInfo((128, 128), "float32"))
B = relax.Var("B", relax.TensorStructInfo((128, 128), "float32"))

#A = relax.Var("A")
#B = relax.Var("B")

bb = relax.BlockBuilder()

with bb.function("main"):
    with bb.dataflow():
        # every emit call generates a variable inside a dataflow block.
        C = bb.emit_te(te_matmul, A, B)
        D = bb.emit_te(te_relu, C)
        R = bb.emit_output(D)              # D 是一个可以在 dataflow block 之外引用的变量。
    bb.emit_func_output(R, params=[A, B])  # 合成函数，只能调用一次 

print(type(C))
assert isinstance(C, relax.Var)

MyModule = bb.get()
MyModule.show()

<class 'tvm.relax.expr.DataflowVar'>


在幕后，bb.emit_te 做了以下事情：

1. 为 A 和 B 创建一个输入 te.placeholder。

2. 通过 te_matmul 函数运行它们。

3. 调用 te.create_prim_func 来创建一个 TensorIR 函数。

4. 通过 call_dps_packed 生成对函数的调用。

通过 bb.emit_output 创建每个 dataflow block 的输出变量。

函数输出由 bb.emit_func_output 标记。 我们只能在每个函数作用域内调用一次 emit_func_output。