In [1]:
# Installing required libraries and Vgg model in onnx format
from IPython.display import clear_output
!wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.onnx
!pip install onnx
clear_output()

In [2]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [3]:
# In Pypi Installation of apache-tvm CUDA is not enabled
!pip install apache-tvm-cu116 -f https://tlcpack.ai/wheels

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://tlcpack.ai/wheels
Collecting apache-tvm-cu116
  Downloading https://github.com/tlc-pack/tlcpack/releases/download/v0.7.dev1/apache_tvm_cu116-0.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (398.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m398.1/398.1 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
Collecting synr==0.6.0
  Downloading synr-0.6.0-py3-none-any.whl (18 kB)
Installing collected packages: synr, apache-tvm-cu116
Successfully installed apache-tvm-cu116-0.9.0 synr-0.6.0


In [4]:
import onnx
import tvm
from tvm import relay
from tvm.contrib import graph_executor
import numpy as np

Step 1 : Convert the model to relay format

In [5]:
# Here the model is in onnx format we are converting it into relay format
onnx_model = onnx.load('vgg16.onnx')
input_name = 'data'
shape_dict = {input_name: (1, 3, 224, 224)}  # Input shape for VGG16
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)



Step 2: Compile the model

In [6]:
# Here I'm using target as GPU
target = 'cuda'
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)



Step 3: Running the model

In [7]:
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
module.set_input(input_name, np.random.uniform(size=(1, 3, 224, 224)).astype("float32"))
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()

Checking Performance

In [8]:
import timeit

timing_number = 10
timing_repeat = 10

unoptimized = (
    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
    * 1000
    / timing_number
)


stats = {
    "mean": np.mean(unoptimized),
    "median": np.median(unoptimized),
    "std": np.std(unoptimized),
}

print(stats)

{'mean': 5.984784039999909, 'median': 6.880844799999863, 'std': 3.005015208387497}
