compiling a neural network using TVM - CPU (LLVM)

In [7]:
# Installing required libraries and Vgg model in onnx format
from IPython.display import clear_output
!wget https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.onnx
!pip install onnx
!pip install apache-tvm

--2023-03-17 22:00:34--  https://s3.amazonaws.com/onnx-model-zoo/vgg/vgg16/vgg16.onnx
Resolving s3.amazonaws.com (s3.amazonaws.com)... 54.231.166.64, 52.216.249.182, 52.217.99.182, ...
Connecting to s3.amazonaws.com (s3.amazonaws.com)|54.231.166.64|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 553437328 (528M) [application/x-www-form-urlencoded]
Saving to: ‘vgg16.onnx.1’


2023-03-17 22:00:47 (41.6 MB/s) - ‘vgg16.onnx.1’ saved [553437328/553437328]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [8]:
import onnx
import tvm
from tvm import relay
from tvm.contrib import graph_executor
import numpy as np

Step 1 : Convert the model to relay format

In [9]:
# Here the model is in onnx format we are converting it into relay format
onnx_model = onnx.load('vgg16.onnx')
input_name = 'data'
shape_dict = {input_name: (1, 3, 224, 224)}  # Input shape for VGG16
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

Step 2: Compile the model

In [10]:
# Here I'm using target as llvm (cpu)
target = 'llvm'
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

Step 3: Running the model

In [11]:
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))
module.set_input(input_name, np.random.uniform(size=(1, 3, 224, 224)).astype("float32"))
module.run()
output_shape = (1, 1000)
tvm_output = module.get_output(0, tvm.nd.empty(output_shape)).numpy()


Checking Performance

In [13]:
import timeit

timing_number = 10
timing_repeat = 10

unoptimized = (
    np.array(timeit.Timer(lambda: module.run()).repeat(repeat=timing_repeat, number=timing_number))
    * 1000
    / timing_number
)


stats = {
    "mean": np.mean(unoptimized),
    "median": np.median(unoptimized),
    "std": np.std(unoptimized),
}

print(stats)

{'mean': 2433.1346460500117, 'median': 2469.799572000011, 'std': 77.4230920250124}


Tuning the model will results in better performance