## 1. 建立並保存 ONNX 模型檔案
以下是一個使用 TensorFlow 建立鳶尾花（Iris）分類模型並將其導出為 ONNX 格式的範例。該模型使用簡單的全連接層來進行分類，並轉換為 ONNX 格式，方便在 TVM 或其他 ONNX 支持的推理引擎上運行。

### 1.1 安裝必要的套件
如果尚未安裝 tensorflow 和 tf2onnx，可以使用以下命令安裝：



In [1]:
!pip install tensorflow tf2onnx

[0m

### 1.2 建立並訓練 TensorFlow 模型
以下程式碼將建立一個簡單的神經網絡來分類鳶尾花數據集，並將其導出為 ONNX 格式。

In [2]:
import tensorflow as tf
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 載入鳶尾花資料集
iris = load_iris()
X = iris.data.astype(np.float32)
y = iris.target

# 分割資料集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 建立模型
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(4,)),  # 4 個特徵
    tf.keras.layers.Dense(10, activation='relu'),  # 隱藏層
    tf.keras.layers.Dense(3, activation='softmax') # 輸出層，3 個分類
])

# 編譯模型
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 訓練模型
model.fit(X_train, y_train, epochs=50, batch_size=5, verbose=0)

# 評估模型
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"模型準確率: {accuracy:.2f}")

2024-11-12 20:44:13.967297: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-12 20:44:14.005158: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-12 20:44:14.005176: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-12 20:44:14.005201: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-12 20:44:14.012298: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-12 20:44:14.012894: I tensorflow/core/platform/cpu_feature_guard.cc:182] This Tens

模型準確率: 0.97


### 1.3 將模型轉換為 ONNX 格式
使用 tf2onnx 將訓練好的 TensorFlow 模型轉換為 ONNX 格式：

In [3]:
import tf2onnx

# 將 Keras 模型轉換為 ONNX 格式
spec = (tf.TensorSpec((None, 4), tf.float32, name="float_input"),)  # 定義輸入規範
output_path = "tf_model.onnx"  # 輸出 ONNX 模型的路徑

# 轉換模型
model_proto, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)
with open(output_path, "wb") as f:
    f.write(model_proto.SerializeToString())

print(f"ONNX 模型已保存至 {output_path}")

ONNX 模型已保存至 tf_model.onnx


2024-11-12 20:44:18.133091: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-11-12 20:44:18.133236: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-11-12 20:44:18.155487: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-11-12 20:44:18.155599: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session


In [1]:
import onnxruntime as ort
import numpy as np

# 加載 ONNX 模型
session = ort.InferenceSession('tf_model.onnx')

# 準備輸入資料
input_name = session.get_inputs()[0].name
input_data = np.array([[6.3, 3.3, 6.0, 2.5]], dtype=np.float32)

# 進行推理
pred_onnx = session.run(None, {input_name: input_data})

# 輸出預測結果
print(pred_onnx)

[array([[5.2434858e-04, 8.7534554e-02, 9.1194111e-01]], dtype=float32)]


### 1.4 TVM輸出共享庫

In [5]:
# import sys

# original_platform = sys.platform
# sys.platform = "linux"

# # 恢復原始平台
# sys.platform = original_platform

In [2]:
import tvm
from tvm import relay
from tvm.contrib import cc, utils
from tvm.contrib import graph_executor
import onnx

# 載入 ONNX 模型
onnx_model = onnx.load("tf_model.onnx")

# 將 ONNX 模型轉換為 Relay 模型
input_name = 'float_input'  # 輸入名稱可在 ONNX 模型中確認
shape_dict = {input_name: (1, 4)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

# 設置目標架構，這裡假設為通用的 CPU
target = tvm.target.Target("llvm", host="llvm -mtriple=x86_64-linux-gnu")
# target = tvm.target.Target("llvm", host="llvm -mtriple=aarch64-linux-gnu")

with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target, params=params)

# 編譯輸出共享庫 
lib.export_library("output.so", cc="gcc")
# lib.export_library("output.so", cc="aarch64-linux-gnu-gcc")

[21:04:06] /home/jovyan/project/ONNX-MLIR/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.1.3 with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`
[21:04:06] /home/jovyan/project/ONNX-MLIR/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.1.3 with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`
[21:04:06] /home/jovyan/project/ONNX-MLIR/tvm/src/target/llvm/llvm_instance.cc:226: Error: Using LLVM 19.1.3 with `-mcpu=apple-latest` is not valid in `-mtriple=arm64-apple-macos`, using default `-mcpu=generic`
One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


In [7]:
!rm -rf output.so

In [3]:
!g++ -std=c++17 -o main tf_inference.cpp \
    -I../tvm/include \
    -I../tvm/3rdparty/dlpack/include \
    -I../tvm/3rdparty/dmlc-core/include \
    ../tvm/build/libtvm_runtime.so \
    -ldl -pthread

In file included from [01m[K../tvm/include/tvm/runtime/container/base.h:28[m[K,
                 from [01m[K../tvm/include/tvm/runtime/container/string.h:29[m[K,
                 from [01m[K../tvm/include/tvm/runtime/module.h:31[m[K,
                 from [01m[Ktf_inference.cpp:2[m[K:
  594 | #define LOG(level) LOG_##level
      | 
In file included from [01m[K../tvm/3rdparty/dmlc-core/include/dmlc/io.h:15[m[K,
                 from [01m[K../tvm/include/tvm/runtime/module.h:29[m[K,
                 from [01m[Ktf_inference.cpp:2[m[K:
[01m[K../tvm/3rdparty/dmlc-core/include/dmlc/./logging.h:263:[m[K [01;36m[Knote: [m[Kthis is the location of the previous definition
  263 | #define LOG(severity) LOG_##severity.stream()
      | 
In file included from [01m[K../tvm/include/tvm/runtime/container/base.h:28[m[K,
                 from [01m[K../tvm/include/tvm/runtime/container/string.h:29[m[K,
                 from [01m[K../tvm/include/tvm/runtime/mod

In [None]:
!g++ -std=c++17 -o main tf_inference.cpp \
    -I../tvm/include \
    -I../tvm/3rdparty/dlpack/include \
    -I../tvm/3rdparty/dmlc-core/include \
    -ltvm_runtime -ldl -pthread

In [4]:
!./main

Prediction Probabilities: [0.000524349, 0.0875346, 0.911941]


In [5]:
# 檢查
!ld ./output.so

ld: ./output.so: undefined reference to `expf'


#### 使用 TVM runtime 加載二進制文件並設置輸入數據，即可執行推論。

In [None]:
from tvm.contrib import graph_executor


# 在目標設備上加載二進制文件
loaded_lib = tvm.runtime.load_module("output.so")
module = graph_executor.GraphModule(loaded_lib["default"](tvm.cpu()))

# 準備輸入資料
input_data = np.array([[6.3, 3.3, 6.0, 2.5]], dtype=np.float32)
# # 設定輸入數據並執行推論
module.set_input("float_input", tvm.nd.array(input_data))
module.run()
output = module.get_output(0).asnumpy()
output

In [None]:
# !ngrok tcp 0.0.0.0:9090
# !python -m tvm.exec.rpc_server --host 0.0.0.0 --port=9090

In [None]:
import tvm
from tvm import rpc, relay
from tvm.contrib import utils, graph_executor
import onnx

# 載入 ONNX 模型
onnx_model = onnx.load("tf_model.onnx")

# 將 ONNX 模型轉換為 Relay 模型
input_name = 'float_input'  # 輸入名稱可在 ONNX 模型中確認
shape_dict = {input_name: (1, 4)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

# 設置目標設備的 IP 地址與端口號
# remote = rpc.connect("目標設備的IP地址", 9090)
remote = rpc.LocalSession()

# 在開發機器上編譯模型（例如為 aarch64 設備）
target = "llvm"
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

# 將編譯好的模型傳輸至目標設備
temp = utils.tempdir()
lib_fname = temp.relpath("deploy_lib.tar")
lib.export_library(lib_fname)
remote.upload(lib_fname)

# 在目標設備上加載模型
remote_lib = remote.load_module("deploy_lib.tar")


In [None]:
lib

In [None]:
remote_lib

In [None]:
lib_fname

In [None]:
import numpy as np

# 創建 graph executor，使用目標設備的 CPU
module = graph_executor.GraphModule(remote_lib["default"](remote.cpu()))

# 設置輸入數據
# 準備輸入資料
input_data = np.array([[6.3, 3.3, 6.0, 2.5]], dtype=np.float32)
# input_data = tvm.nd.array(input_array, device=remote.cpu())  # 輸入的數據
module.set_input("float_input", input_data)

# 執行推論
module.run()

# 取得輸出
output = module.get_output(0).asnumpy()
print("推論結果：", output)

In [None]:
lib_fname

In [13]:
import numpy as np
import tvm
from tvm import rpc, relay
from tvm.contrib import utils, graph_executor

remote = rpc.connect("0.tcp.jp.ngrok.io", 11685)
# 在目標設備上加載模型
remote_lib = remote.load_module("/tmp/tmp7ppa104d/deploy_lib.tar")

# 創建 graph executor，使用目標設備的 CPU
module = graph_executor.GraphModule(remote_lib["default"](remote.cpu()))

# 設置輸入數據
# 準備輸入資料
input_data = np.array([[6.3, 3.3, 6.0, 2.5]], dtype=np.float32)
# input_data = tvm.nd.array(input_array, device=remote.cpu())  # 輸入的數據
module.set_input("float_input", input_data)

# 執行推論
module.run()

# 取得輸出
output = module.get_output(0).asnumpy()
print("推論結果：", output)

# 釋放資源
del module
del remote_lib
del remote

推論結果： [[0.00405862 0.29633874 0.69960266]]


### 1.4 TVM 進行編譯產生 C

In [None]:
!export TVM_HOME=/home/jovyan/project/ONNX-MLIR/tvm
!export PYTHONPATH=$TVM_HOME/python:$PYTHONPATH

In [None]:
!TVM_LIBRARY_PATH=/home/jovyan/project/ONNX-MLIR/tvm/build python3 run.py

In [None]:
import tvm
from tvm import relay
from tvm.contrib import cc
import onnx

# Load the ONNX model
onnx_model = onnx.load("tf_model.onnx")

# Convert ONNX model to Relay format
mod, params = relay.frontend.from_onnx(onnx_model, shape={"float_input": (1, 4)})

# Compile the model with TVM
target = "c"
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

# Export the compiled library
c_source_code = lib.get_lib().get_source()

# 將程式碼寫入到一個 .c 檔案
with open('output.c', 'w') as file:
    file.write(c_source_code)

print("C source code 已經儲存到 output.c")

# lib.export_library("model.zip")

In [None]:
!gcc -o inference output.c -lm

In [None]:
!g++ output.c -o output -I../tvm/3rdparty/dlpack/include -ltvm_runtime -ldl -lpthread

## microTVM

In [None]:
import tvm
from tvm import relay
from tvm.contrib import utils
from tvm.relay.backend import Runtime
import onnx

# 載入 ONNX 模型
onnx_model = onnx.load("tf_model.onnx")

# 將 ONNX 模型轉換為 Relay 模型
input_name = 'float_input'  # 輸入名稱依據您的 ONNX 模型
shape_dict = {input_name: (1, 4)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

# 設置目標架構並啟用 C runtime
target = tvm.target.Target("llvm -system-lib -mtriple=x86_64-linux-gnu")
runtime = Runtime("c")

with tvm.transform.PassContext(opt_level=3):
    # 編譯模型，指定 runtime=runtime 並啟用 system-lib
    lib = relay.build(mod, target=target, params=params, runtime=runtime)

# 將 C 代碼導出為 model.c
lib.export_library("model.c", cc="gcc")


In [None]:
!ls

In [None]:
!tar -xvf ./module.tar