In [1]:
import tvm
from tvm import relay
import onnx
from tvm.contrib import graph_executor
import numpy as np


In [2]:
# 1. Load the ONNX model
onnx_model = onnx.load(r'/home/dharineesh22/tvm_assignment/base_resnet_model.onnx')


In [3]:
# 2. Define the input shape for the model
shape_dict = {"input": (1, 32, 32, 1)}


In [4]:
# 3. Convert the ONNX model to TVM relay format
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)


In [5]:
# 4. Optimize the model using TVM (ensure optimization passes)
target = "llvm"
with tvm.transform.PassContext(opt_level=3):
    mod = relay.transform.InferType()(mod)  # Ensure types are inferred
    optimized_mod = relay.transform.EliminateCommonSubexpr()(mod)  # Apply optimization


In [6]:
# 5. Compile the optimized model into a shared library (.so)
lib = relay.build(optimized_mod, target=target, params=params)

# Save the compiled model as .so file
lib.export_library('optimized_model.so')




One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


In [7]:
# 6. Run inference on the optimized model
# Create the TVM runtime and set the input
ctx = tvm.device(target,0)  # Choose the appropriate context (e.g., tvm.cuda(0) for GPU)
# ctx = tvm.cpu()
runtime = graph_executor.GraphModule(lib['default'](ctx))  # Use graph_executor instead of graph_runtime



In [8]:
# Create random input data (adjust size to match your model's input shape)
input_data = np.random.uniform(-1, 1, size=(1, 32, 32, 1)).astype("float32")

# Set the input to the model
runtime.set_input("input", input_data)

# Run inference
runtime.run()

# Get the output
output = runtime.get_output(0).asnumpy()

print("Inference output:", output)

# cpu inference output:

# Inference output: [[2.7283281e-42 1.4005959e-17 2.0017176e-27 1.0000000e+00 1.7657622e-19
#   6.9533849e-14 6.1296901e-31 2.6723934e-16 8.2944257e-41 3.0368763e-13]]

# gpu inference output:

# Inference output: [[6.6519638e-42 1.4720455e-17 2.4196034e-27 1.0000000e+00 7.3476063e-20
#   9.9715603e-13 1.9307948e-31 5.2554139e-17 2.9285737e-41 5.3557067e-13]]


Inference output: [[6.6519638e-42 1.4720455e-17 2.4196034e-27 1.0000000e+00 7.3476063e-20
  9.9715603e-13 1.9307948e-31 5.2554139e-17 2.9285737e-41 5.3557067e-13]]


In [9]:
graph_json = lib.get_graph_json()  # Get the graph as JSON
with open('optimized_model_graph.json', 'w') as f:
    f.write(graph_json)