In [6]:
import tvm
from tvm import relay

import numpy as np

from tvm.contrib.download import download_testdata

# PyTorch imports
import torch
import torchvision

In [7]:
from torchvision.models import ResNet18_Weights
labels = ResNet18_Weights.DEFAULT.meta["categories"]  # 1000 labels
with open("labels.txt","w", encoding="utf-8") as f:
    f.write("\n".join(labels))
print("Wrote labels.txt with", len(labels), "classes")

Wrote labels.txt with 1000 classes


In [8]:
model_name = "resnet18"
model = getattr(torchvision.models, model_name)(pretrained=True)
model = model.eval()

# We grab the TorchScripted model via tracing
input_shape = [1, 3, 224, 224]
input_data = torch.randn(input_shape)
scripted_model = torch.jit.trace(model, input_data).eval()

In [9]:
from PIL import Image

img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"
img_path = download_testdata(img_url, "cat.png", module="data")
img = Image.open(img_path).resize((224, 224))

# Preprocess the image and convert to tensor
from torchvision import transforms

my_preprocess = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)
img = my_preprocess(img)
img = np.expand_dims(img, 0)

In [10]:
input_name = "input0"
shape_list = [(input_name, img.shape)]
mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

## Choosing the Target Architecture

In TVM, a target defines how the model will be compiled for the hardware. You can either use:

1. **Target strings**: e.g., `"llvm -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -mattr=+neon"`
2. **Helper functions**: e.g., `tvm.target.arm_cpu(model="raspi4")` or `tvm.target.cuda()`

| Device | TVM Helper | Example Target String |
|--------|------------|---------------------|
| Raspberry Pi 4 (64-bit) | `tvm.target.arm_cpu(model="raspi4")` | `llvm -mtriple=aarch64-linux-gnu -mcpu=cortex-a72 -mattr=+neon` |
| Raspberry Pi 3 / Zero 2 | `tvm.target.arm_cpu(model="raspi3")` | `llvm -mtriple=aarch64-linux-gnu -mcpu=cortex-a53 -mattr=+neon` |
| x86_64 Desktop | `tvm.target.llvm(mcpu="native")` | `llvm -mcpu=native` |
| Jetson Xavier NX GPU | `tvm.target.cuda(arch="sm_75")` | `cuda -arch=sm_75` |
| STM32 Microcontrollers | `tvm.target.stm32(series="stm32H7xx")` | `c -mcpu=cortex-m7` |

### Cross-Compilation Notes

When compiling on a host machine (e.g., x86 laptop) for an embedded target (Raspberry Pi, Jetson), 
TVM uses cross-compilation. Make sure to:

- Specify `mtriple` to match target OS/architecture.
- Specify `mcpu` to match the exact CPU core.
- Optionally specify `mattr` for hardware features.

You can list built-in target tags using:
```python
tvm.target.list_tags()
```

Refs: https://tvm.apache.org/docs/reference/api/python/target.html

In [11]:
target = tvm.target.Target("llvm", host="llvm")

dev = tvm.cpu(0)
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

# Save graph JSON
with open("resnet18_tvm.json", "w") as f_json:
    f_json.write(lib.get_graph_json())

# Save parameters
with open("resnet18_tvm.params", "wb") as f_params:
    f_params.write(tvm.runtime.save_param_dict(lib.params))

# Compile into a shared lib, dynamic lib if your slang is from windows ;)
from tvm.contrib import cc
lib.export_library("resnet18_tvm.so", fcompile=cc.create_shared)

One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


## After compilation

After compiling the model to your preffered architecture, you can continue with this notebook to run the model in python (your target architecture or here)
or you can go to the `tvm_cpp` directory and paste into `artifacts` the following files generated by TVM:
- labels.txt
- resnet18_tvm.json
- resnet18_tvm.params
- resnet18_tvm.so

From this you can compile your C++ code with the appropriate libraries.

In [6]:
from tvm.contrib import graph_executor

dtype = "float32"
m = graph_executor.GraphModule(lib["default"](dev))
# Set inputs
m.set_input(input_name, tvm.nd.array(img.astype(dtype)))
# Execute
m.run()
# Get outputs
tvm_output = m.get_output(0)

In [7]:
synset_url = "".join(
    [
        "https://raw.githubusercontent.com/Cadene/",
        "pretrained-models.pytorch/master/data/",
        "imagenet_synsets.txt",
    ]
)
synset_name = "imagenet_synsets.txt"
synset_path = download_testdata(synset_url, synset_name, module="data")
with open(synset_path) as f:
    synsets = f.readlines()

synsets = [x.strip() for x in synsets]
splits = [line.split(" ") for line in synsets]
key_to_classname = {spl[0]: " ".join(spl[1:]) for spl in splits}

class_url = "".join(
    [
        "https://raw.githubusercontent.com/Cadene/",
        "pretrained-models.pytorch/master/data/",
        "imagenet_classes.txt",
    ]
)
class_name = "imagenet_classes.txt"
class_path = download_testdata(class_url, class_name, module="data")
with open(class_path) as f:
    class_id_to_key = f.readlines()

class_id_to_key = [x.strip() for x in class_id_to_key]

# Get top-1 result for TVM
top1_tvm = np.argmax(tvm_output.numpy()[0])
tvm_class_key = class_id_to_key[top1_tvm]

# Convert input to PyTorch variable and get PyTorch result for comparison
with torch.no_grad():
    torch_img = torch.from_numpy(img)
    output = model(torch_img)

    # Get top-1 result for PyTorch
    top1_torch = np.argmax(output.numpy())
    torch_class_key = class_id_to_key[top1_torch]

print("Relay top-1 id: {}, class name: {}".format(top1_tvm, key_to_classname[tvm_class_key]))
print("Torch top-1 id: {}, class name: {}".format(top1_torch, key_to_classname[torch_class_key]))

Relay top-1 id: 281, class name: tabby, tabby cat
Torch top-1 id: 281, class name: tabby, tabby cat
