# <b>Chapter 11 - Running ML models on Arduino and Arm Ethos-U55 microNPU using Apache TVM</b>

## <b>Downloading the pre-trained CIFAR-10 model and input test image</b>

### Download the pre-trained CIFAR-10 model

In [None]:
!wget https://github.com/PacktPublishing/TinyML-Cookbook_2E/raw/main/Chapter11/Assets/cifar10.tflite

### Download the input.h C header file

In [None]:
!wget https://raw.githubusercontent.com/PacktPublishing/TinyML-Cookbook_2E/main/Chapter11/Assets/input.h

## <b>Deploying models with TVM using the ahead-of-time executor on the host machine</b>

### Install TVM (v0.11.1)

In [None]:
!pip install apache-tvm==0.11.1

### Install the TensorFlow Lite Python package (2.10.0)

In [None]:
!pip install tflite==2.10.0

### Load the CIFAR-10 TensorFlow Lite model from disk

In [None]:
import tflite

tfl_file = open("cifar10.tflite", "rb").read()
tfl_model = tflite.Model.GetRootAsModel(tfl_file, 0)

### Import the TensorFlow Lite model in TVM

In [None]:
import tvm
mod, params = tvm.relay.frontend.from_tflite(tfl_model)

### Define the host machine as  target device

In [None]:
target = tvm.target.target.micro("host")

### Define the C runtime as TVM runtime type

In [None]:
crt = tvm.relay.backend.Runtime("crt", {"system-lib" : True})

### Define AoT executor as TVM executor type

In [None]:
aot = tvm.relay.backend.Executor("aot")

### Compile the model for the target

In [None]:
with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
    module = tvm.relay.build(mod, target, runtime=crt, executor=aot, params=params)

### Create the microTVM project

In [None]:
import os
import shutil

base_dir = "/tmp/tvm/"
build_dir = base_dir + "host"
is_exist = os.path.exists(base_dir)

if is_exist:
  shutil.rmtree(base_dir)

os.mkdir(base_dir)

project = tvm.micro.generate_project(
    tvm.micro.get_microtvm_template_projects("crt"),
    module,
    build_dir,
)

### Visualize the path to the directory containing the template project for "crt"

In [None]:
print(tvm.micro.get_microtvm_template_projects("crt"))

### Build and flash

In [None]:
project.build()
project.flash()

### Get the model's input and output quantization parameters

In [None]:
graph = tfl_model.Subgraphs(0)

i_idx = 0
o_idx = graph.TensorsLength() - 1

i_tensor = graph.Tensors(i_idx)
o_tensor = graph.Tensors(o_idx)

i_quant = i_tensor.Quantization()
o_quant = o_tensor.Quantization()

i_scale = i_quant.Scale(0)
i_zero_point = i_quant.ZeroPoint(0)

o_scale = o_quant.Scale(0)
o_zero_point = o_quant.ZeroPoint(0)

print(o_scale)
print(o_zero_point)

### Download test image

In [None]:
!wget -O ship.jpg https://github.com/PacktPublishing/TinyML-Cookbook_2E/blob/main/Chapter11/Assets/ship.jpg?raw=true

### Load the image file and resize to 32x32

In [None]:
from numpy import asarray
from PIL import Image

img_name = 'ship.jpg'
image = Image.open(img_name)
image = image.resize((32,32))

### Convert to NumPy array:

In [None]:
import numpy as np
sample = asarray(image)[np.newaxis, :]

### Normalize and quantize the input sample

In [None]:
sample = sample / 255.0
sample = (sample / i_scale) + i_zero_point

### Run the model using the host driven executor

In [None]:
import numpy as np

with tvm.micro.Session(project.transport()) as session:
  x = session.create_aot_executor()
  aot_executor = tvm.runtime.executor.aot_executor.AotModule(x)
  aot_executor.get_input(0).copyfrom(sample)
  aot_executor.run()
  result = aot_executor.get_output(0).numpy()

### Dequantize the output

In [None]:
result = o_scale * (result - o_zero_point)

### Read the output classification

In [None]:
labels = [
    "airplane", "automobile", "bird",
    "cat", "deer", "dog",
    "frog", "horse", "ship", "truck"
]

print(f"Result: `{labels[np.argmax(result)]}`")

## <b>Deploying models on the Arduino Nano</b>

### Install Arduino CLI

In [None]:
!curl -fsSL https://raw.githubusercontent.com/arduino/arduino-cli/0.34.0/install.sh | sh

import os
os.environ['PATH'] += ':/content/bin'

### Define the Arduino Nano target

In [None]:
nano33 = tvm.target.target.micro("nrf52840")

### List all target names

In [None]:
print(tvm.target.target.MICRO_SUPPORTED_MODELS)

### Define the C runtime and AoT executor as TVM runtime and executor type

In [None]:
crt = tvm.relay.backend.Runtime("crt")

opts_exec = {"unpacked-api": True}
aot = tvm.relay.backend.Executor("aot", opts_exec)

### Write a helper function to compile the model

In [None]:
def compile_model(device):
  compiler_opts = {"tir.disable_vectorize": True}
  with tvm.transform.PassContext(opt_level=3, config=compiler_opts):
    return tvm.relay.build(mod, device, runtime=crt, executor=aot, params=params)

### Compile the model for the Arduino Nano

In [None]:
lib_nano33 = compile_model(nano33)

### Display the list of supported Arduino boards in TVM

In [None]:
x = tvm.micro.get_microtvm_template_projects("arduino")
file_path = x + "/boards.json"

!cat $file_path

### Write a helper function to create an Arduino project from the library generated by tvm.relay().build() function

In [None]:
def build_arduino_prj(board, lib):
  base_dir = "/tmp/tvm/"
  is_exist = os.path.exists(base_dir)
  if is_exist:
    shutil.rmtree(base_dir)

  os.mkdir(base_dir)

  build_dir = base_dir + board

  shutil.rmtree(build_dir, ignore_errors=True)

  return tvm.micro.generate_project(
    tvm.micro.get_microtvm_template_projects("arduino"),
    lib,
    build_dir,
    {
        "board": board,
        "project_type": "example_project",
    },
)

### Create the Arduino project

In [None]:
prj_nano = build_arduino_prj("nano33ble", lib_nano33)

### Display the content of the /tmp/tvm/nano33ble folder

In [None]:
!ls /tmp/tvm/nano33ble/

### Show the content of the nano33ble.ino sketch

In [None]:
!cat /tmp/tvm/nano33ble/nano33ble.ino

### Remove the boards.json, Makefile, microtvm_api_server.py, and nano33ble.ino files from the Arduino project

In [None]:
!rm /tmp/tvm/nano33ble/boards.json
!rm /tmp/tvm/nano33ble/Makefile
!rm /tmp/tvm/nano33ble/microtvm_api_server.py
!rm /tmp/tvm/nano33ble/nano33ble.ino

### Remove the inclusion of the Arduino.h in model.c

In [None]:
# The inclusion of this file could cause compilation issues on some platforms
!sed -i 's/#include "Arduino.h"//g' /tmp/tvm/nano33ble/src/model.c

### Copy the input.h C header file into the Arduino project

In [None]:
!cp input.h /tmp/tvm/nano33ble/src

### Zip the content of the Arduino project

In [None]:
!cd /tmp/tvm/nano33ble; zip -r micro_tvm_code.zip .
!mv /tmp/tvm/nano33ble/micro_tvm_code.zip .

In [None]:
%%writefile /tmp/tvm/nano33ble/nano33ble.ino
#include "src/model.h"
#include "src/input.h"
#include "mbed.h"

float out_scale = 0.10305877029895782;
int32_t out_zero_point = 20;

void dequantize(int8_t* src, float* dst, int32_t len) {
  for(int32_t i = 0; i < len; ++i) {
    dst[i] = out_scale  * (src[i] - out_zero_point);
  }
}

int32_t argmax(float* src, int32_t len) {
  int32_t max_idx = 0;
  float max_score = src[0];

  for(int32_t i = 1; i < len; ++i) {
    if(src[i] > max_score) {
      max_score = src[i];
      max_idx   = i;
    }
  }
  return max_idx;
}

mbed::DigitalOut led(LED1);

void setup() {
  TVMInitialize();
  led = 0;
}

void loop() {
  int8_t out_q8[10];
  float out_f32[10];
  TVMExecute(g_test, out_q8);

  dequantize(out_q8, out_f32, 10);

  int32_t max_idx = argmax(out_f32, 10);

  if(max_idx == g_test_ilabel) {
    led = 1;
  }
  while(1);
}

## <b>Deploying models on the Raspberry Pi Pico</b>

### Define the Raspberry Pi Pico target

In [None]:
rp2040 = tvm.target.target.micro("rp2040")

### Define the C runtime and AoT executor as TVM runtime and executor type

In [None]:
crt = tvm.relay.backend.Runtime("crt")
aot = tvm.relay.backend.Executor("aot", {"unpacked-api": True})

### Compile the model for the Raspberry Pi Pico

In [None]:
lib_rp2040 = compile_model(rp2040)

### Create the Arduino project

In [None]:
prj_pico = build_arduino_prj("nano33ble", lib_rp2040)

### Remove the boards.json, Makefile, microtvm_api_server.py, and nano33ble.ino files from the Arduino project

In [None]:
!rm /tmp/tvm/nano33ble/boards.json
!rm /tmp/tvm/nano33ble/Makefile
!rm /tmp/tvm/nano33ble/microtvm_api_server.py
!rm /tmp/tvm/nano33ble/nano33ble.ino

### Remove the inclusion of the Arduino.h in model.c

In [None]:
# The inclusion of this file could cause compilation issues on some platforms
!sed -i 's/#include "Arduino.h"//g' /tmp/tvm/nano33ble/src/model.c

### Copy the input.h C header file into the Arduino project

In [None]:
!cp input.h /tmp/tvm/nano33ble/src

### Zip the content of the Arduino project

In [None]:
!cd /tmp/tvm/nano33ble; zip -r micro_tvm_code.zip .
!mv /tmp/tvm/nano33ble/micro_tvm_code.zip .

### Generate code with TVM for the SparkFun Artemis Nano

In [None]:
# Specify target device and execution type
t = "c -keys=arm_cpu,cpu -mcpu=cmsis-nn,cortex-m4"
artemis_nano = tvm.target.Target(t)
crt = tvm.relay.backend.Runtime("crt")
aot = tvm.relay.backend.Executor("aot", {"unpacked-api": True})

# Compile project
lib_artemis_nano = compile_model(artemis_nano)

# Build Arduino project
prj_artemis_nano = build_arduino_prj("nano33ble", lib_artemis_nano)

# Remove files not required for the project
!rm /tmp/tvm/nano33ble/boards.json
!rm /tmp/tvm/nano33ble/Makefile
!rm /tmp/tvm/nano33ble/microtvm_api_server.py
!rm /tmp/tvm/nano33ble/nano33ble.ino

# Remove the inclusion of the Arduino.h header file in /tmp/tvm/nano33ble/src/model.c
!sed -i 's/#include "Arduino.h"//g' /tmp/tvm/nano33ble/src/model.c

# Zip the content of the Arduuno project
!cd /tmp/tvm/nano33ble; zip -r micro_tvm_code.zip .
!mv /tmp/tvm/nano33ble/micro_tvm_code.zip .

## <b>Installing the Arm Corstone-300 Fixed Virtual Platform</b>

### Download the FVP model for the Arm Corstone-300 platform

In [None]:
!wget https://developer.arm.com/-/media/Arm%20Developer%20Community/Downloads/OSS/FVP/Corstone-300/FVP_Corstone_SSE-300_11.22_20_Linux64.tgz?rev=018659bd574f4e7b95fa647e7836ccf4&hash=22A79103C6FA5FFA7AFF3BE0447F3FF9

### Decompress the .tgz file and make FVP_Corstone_SSE-300.sh executable

In [None]:
!tar -xvzf FVP_Corstone_SSE-300_11.22_20_Linux64.tgz?rev=018659bd574f4e7b95fa647e7836ccf4

In [None]:
!chmod +x FVP_Corstone_SSE-300.sh

### Run the ./FVP_Corstone_SSE-300.sh

In [None]:
!./FVP_Corstone_SSE-300.sh \
  --i-agree-to-the-contained-eula \
  --no-interactive

In [None]:
!ls /usr/local/FVP_Corstone_SSE-300/models/Linux64_GCC-9.3

### Add the path of the Corstone-300 binaries to the $PATH environment variable

In [None]:
os.environ['PATH'] += ':/usr/local/FVP_Corstone_SSE-300/models/Linux64_GCC-9.3'

### Verify the correct installation of the FVP model for the Arm Corstone-300 platform

In [None]:
!FVP_Corstone_SSE-300_Ethos-U55 --version

## <b>Code generation with TVMC for Arm Ethos-U55</b>

### Test TVMC tool

https://github.com/apache/tvm/tree/v0.11.0/apps/microtvm/ethosu

In [None]:
!python -m tvm.driver.tvmc

### Create the ethosu_prj directory to keep the files required for the Arm Ethos-U55 application we aim to build

In [None]:
!mkdir ethosu_prj

### Create a folder called tvm_code inside the ethosu_prj directory to keep the C code generated by TVM

In [None]:
!mkdir ethosu_prj/tvm_code

### Install the Arm Ethos-U Vela Python package

In [None]:
!pip install ethos-u-vela==3.8.0

### Generate the C code for running the CIFAR-10 model inference on the Arm Ethos-U55

In [None]:
!python -m tvm.driver.tvmc compile \
             --target=ethos-u,cmsis-nn,c \
             --target-ethos-u-accelerator_config=ethos-u55-256 \
             --target-cmsis-nn-mcpu=cortex-m55 \
             --target-c-mcpu=cortex-m55 \
             --runtime=crt \
             --executor=aot \
             --executor-aot-interface-api=c \
             --executor-aot-unpacked-api=1 \
             --pass-config tir.disable_vectorize=1 \
             --pass-config tir.usmp.enable=1 \
             --pass-config tir.usmp.algorithm=hill_climb \
             --output-format=mlf \
             ./cifar10.tflite

### Extract the content of the module.tar file into the ethosu_prj/tvm_code/ directory

In [None]:
!tar -C ethosu_prj/tvm_code -xvf module.tar

## <b>Installing the software dependencies for building an application for the Arm Ethos-U microNPU</b>

### Create a folder to hold the GNU Arm embedded toolchain binaries

In [None]:
!mkdir ethosu_prj/toolchain

### Download the GNU Arm embedded toolchain

In [None]:
!curl --retry 64 -sSL 'https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2?revision=ca0cbf9c-9de2-491c-ac48-898b5bbc0443&la=en&hash=68760A8AE66026BCF99F05AC017A6A50C6FD832A' | tar -C ethosu_prj/toolchain --strip-components=1 -jx

### Add the path of the GNU Arm embedded toolchain binaries to the $PATH environment variable

In [None]:
os.environ['PATH'] += ':/content/ethosu_prj/toolchain/bin/'

### Verify the correct installation of the GNU Arm embedded toolchain

In [None]:
!arm-none-eabi-gcc -mcpu=.

### Clone the Arm Ethos-U driver (21.11 release)

In [None]:
!git clone \
  "https://review.mlplatform.org/ml/ethos-u/ethos-u-core-driver" \
  ethosu_prj/driver \
  --branch 21.11

### Clone the Arm Ethos-U platform (21.11 release)

In [None]:
!git clone \
  "https://review.mlplatform.org/ml/ethos-u/ethos-u-core-platform" \
  ethosu_prj/platform \
  --branch 21.11

### Clone the CMSIS library (5.9.0 release)

In [None]:
!git clone \
  "https://github.com/ARM-software/CMSIS_5.git" \
  ethosu_prj/cmsis \
  --branch 5.9.0

### <b>Running the CIFAR-10 model inference on the Arm Ethos-U55 microNPU</b>

### Clone the TVM v0.11.1 repository

In [None]:
!git clone \
  "https://github.com/apache/tvm.git" \
  --branch v0.11.1

### Copy the sample code (src/ and include/) to run the MobileNet v2 model inference on Arm Ethos-U55

In [None]:
!cp -r tvm/apps/microtvm/ethosu/src ethosu_prj/
!cp -r tvm/apps/microtvm/ethosu/include ethosu_prj/

### Copy the build scripts (arm-none-eabi-gcc.cmake and corstone300.ld) in the ethosu_prj/ folder


In [None]:
!cp -r tvm/apps/microtvm/ethosu/arm-none-eabi-gcc.cmake ethosu_prj/
!cp -r tvm/apps/microtvm/ethosu/corstone300.ld ethosu_prj/

### Copy the input.h C header file into the ethosu_prj/ folder

In [None]:
!cp input.h ethosu_prj/include

### Specify the memory section attribute ()

In [None]:
os.environ['src_txt'] = 'g_test\[\]'
os.environ['dst_txt'] = 'g_test\[\] __attribute__((section("ethosu_scratch"), aligned(16)))'

!sed -i "s/$src_txt/$dst_txt/g" \
  ethosu_prj/include/input.h

### Write the application for running the CIFAR-10 model inference on the Arm Ethos-U55

the %%writefile command is a Jupyter Notebook command used to write the content of the cell to a file on the disk.

In [None]:
%%writefile ethosu_prj/src/demo_bare_metal.c
#include <stdio.h>
#include <tvm_runtime.h>
#include <tvmgen_default.h>

#include "ethosu_mod.h"
#include "uart.h"

#include "input.h"

float out_scale = 0.10305877029895782;
int32_t out_zero_point = 20;

void dequantize(int8_t* src, float* dst, int32_t len) {
  for(int32_t i = 0; i < len; ++i) {
    dst[i] = out_scale  * (src[i] - out_zero_point);
  }
}

int32_t argmax(float* src, int32_t len) {
  int32_t max_idx = 0;
  float max_score = src[0];

  for(int32_t i = 1; i < len; ++i) {
    if(src[i] > max_score) {
      max_score = src[i];
      max_idx   = i;
    }
  }
  return max_idx;
}

int abs(int v) { return v * ((v > 0) - (v < 0)); }

int8_t out_q8[10] __attribute__((section("ethosu_scratch"), aligned(16)));
float out_f32[10] __attribute__((section("ethosu_scratch"), aligned(16)));

int main(int argc, char** argv) {
  uart_init();

  printf("Starting Demo\n");
  EthosuInit();

  printf("Running inference\n");
  struct tvmgen_default_outputs outputs = {
      .StatefulPartitionedCall_0 = out_q8,
  };

  struct tvmgen_default_inputs inputs = {
      .serving_default_input_1_0 = g_test,
  };
  struct ethosu_driver* driver = ethosu_reserve_driver();
  struct tvmgen_default_devices devices = {
      .ethos_u = driver,
  };
  tvmgen_default_run(&inputs, &outputs, &devices);
  ethosu_release_driver(driver);

  dequantize(out_q8, out_f32, 10);

  int32_t max_idx = argmax(out_f32, 10);

  if(max_idx == g_test_ilabel) {
    printf("The image has been correctly classified\n");
  }
  else {
    printf("Classification FAILED\n");
  }

  // The FVP will shut down when it receives "EXITTHESIM" on the UART
  printf("EXITTHESIM\n");
  while (1 == 1)
    ;
  return 0;
}

### Download the Makefile and store it in the ethosu_prj/ folder

In [None]:
!wget https://raw.githubusercontent.com/PacktPublishing/TinyML-Cookbook_2E/main/Chapter11/Assets/Makefile \
  -P ethosu_prj/

### Compile the application

In [None]:
!cd ethosu_prj; make

### Run the application on the FVP model for the Arm Corstone-300 platform

In [None]:
!cd ethosu_prj; FVP_Corstone_SSE-300_Ethos-U55 \
-C cpu0.CFGDTCMSZ=15 \
-C cpu0.CFGITCMSZ=15 \
-C mps3_board.uart0.out_file=\"-\" \
-C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \
-C mps3_board.visualisation.disable-visualisation=1 \
-C mps3_board.telnetterminal0.start_telnet=0 \
-C mps3_board.telnetterminal1.start_telnet=0 \
-C mps3_board.telnetterminal2.start_telnet=0 \
-C mps3_board.telnetterminal5.start_telnet=0 \
-C ethosu.extra_args="--fast" \
-C ethosu.num_macs=256 ./build/demo