# Using TVM Complier on CPU

![flow](https://raw.githubusercontent.com/apache/tvm-site/main/images/tutorial/overview.png)

## Import dependencies

In [1]:
import tvm 
import numpy as np
import onnx
import os
import tvm.relay as relay
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import warnings
import time
warnings.filterwarnings('ignore')

caused by: ["[Errno 2] The file to load file system plugin from does not exist.: '/home/khadas/.pyenv/versions/3.7.12/lib/python3.7/site-packages/tensorflow_io/python/ops/libtensorflow_io_plugins.so'"]
caused by: ['/home/khadas/.pyenv/versions/3.7.12/lib/python3.7/site-packages/tensorflow_io/python/ops/libtensorflow_io.so: cannot open shared object file: No such file or directory']


## Prepare our own dataset

In [2]:
# Actions that we try to detect 5 classes
actions = np.array(['hello', 'iloveyou', 'yes','donothing'])
# Map actions to integers
label_map = {label:num for num, label in enumerate(actions)}
print(f'Label map: {label_map}')

# Map x, y for data and labels
DATA_PATH = os.path.join('MP_Data') 
no_sequences = 100
sequence_length = 30
sequences, labels = [], []
for action in actions:
    for sequence in range(no_sequences):
        window = []
        for frame_num in range(sequence_length):
            res = np.load(os.path.join(DATA_PATH, action, str(sequence), "{}.npy".format(frame_num)))
            window.append(res)
        sequences.append(window)
        labels.append(label_map[action])

# One hot encoding
y = to_categorical(labels).astype(int)
print(f'y shape: {y.shape}')
# print(f'Y: \n{y}')
X = np.array(sequences)
print(f'X shape: {X.shape}')

# Train Test split with testing size 0.1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=11)

print(f'X_train shape: {X_train.shape}')
print(f'X_test shape: {X_test.shape}')
print(f'y_train shape: {y_train.shape}')
print(f'y_test shape: {y_test.shape}')



Label map: {'hello': 0, 'iloveyou': 1, 'yes': 2, 'donothing': 3}
y shape: (400, 4)
X shape: (400, 30, 1662)
X_train shape: (360, 30, 1662)
X_test shape: (40, 30, 1662)
y_train shape: (360, 4)
y_test shape: (40, 4)


## Load TF Onnx model

In [3]:
# Model Path
model_path = 'slModel.onnx'
# Load onnx model
onnx_model = onnx.load(model_path)

# Set random seed for random numpy
np.random.seed(3)

### Show Onnx model input

In [4]:
print(f'Model inputs: {onnx_model.graph.input}')

Model inputs: [name: "lstm_10_input"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_param: "unk__303"
      }
      dim {
        dim_value: 30
      }
      dim {
        dim_value: 1662
      }
    }
  }
}
]


### Show Onnx model output

In [5]:
print(f'Model output: {onnx_model.graph.output}')

Model output: [name: "dense_9"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_param: "unk__304"
      }
      dim {
        dim_value: 4
      }
    }
  }
}
]


## Convert Onnx to Relay for compilation

In [6]:
# compiler target
target = 'llvm'

# Input parameter name
input_name = onnx_model.graph.input[0].name
# Input shape of the model
input_shape = (1, 30, 1662)
# Tell the ralay which ONNX parameter is input
shape_dict = {input_name: input_shape}
print(shape_dict)
# Passing information into relay which is from onnx model
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

{'lstm_10_input': (1, 30, 1662)}


### Build Relay

In [7]:
# The optimization level of this pass.
opt_level = 3
# Each pass context contains a number of auxiliary information that is used to help an optimization pass.
with tvm.transform.PassContext(opt_level=opt_level):
    # builds a Relay function to run on TVM graph executor.
    lib = relay.build(mod, target=target, params=params)

One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


### Create a runtime module

In [8]:
# device to optimize to (CPU)
dev = tvm.device(str(target), 0)

# Create a runtime executor module given a graph and module.
# Wrapper runtime module.
module = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

## Benchmark module performance

`benchmark(device, func_name='run', repeat=5, number=5, min_repeat_ms=None, limit_zero_time_iterations=100, end_to_end=False, cooldown_interval_ms=0, repeats_to_cooldown=1, **kwargs)¶`

Calculate runtime of a function by repeatedly calling it.

Use this function to get an accurate measurement of the runtime of a function. The function is run multiple times in order to account for variability in measurements, processor speed or other external factors. Mean, median, standard deviation, min and max runtime are all reported. On GPUs, CUDA and ROCm specifically, special on-device timers are used so that synchonization and data transfer operations are not counted towards the runtime. This allows for fair comparison of runtimes across different functions and models. The end_to_end flag switches this behavior to include data transfer operations in the runtime.

Returns
timing_results – Runtimes of the function. Use .mean to access the mean runtime, use .results to access the individual runtimes (in seconds).

Return type
BenchmarkResult

### Before optimization

In [9]:
timing_results = module.benchmark(device = dev, repeat = 30, number = 1)
print(timing_results)

Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  16.0726      15.1003      28.0091      14.6035       2.8954   
               


In [10]:
print(f'Mean time before optimization : {timing_results.mean}')

Mean time before optimization : 0.01607257056666667


## Tune the model

The previous model was compiled to work on the TVM runtime, but did not include any platform specific optimization. In this section, we will show you how to build an optimized model using TVM to target your working platform.

In some cases, we might not get the expected performance when running inferences using our compiled module. In cases like this, we can make use of the auto-tuner, to find a better configuration for our model and get a boost in performance. Tuning in TVM refers to the process by which a model is optimized to run faster on a given target. This differs from training or fine-tuning in that it does not affect the accuracy of the model, but only the runtime performance. As part of the tuning process, TVM will try running many different operator implementation variants to see which perform best. The results of these runs are stored in a tuning records file.

In the simplest form, tuning requires you to provide three things:

- the target specification of the device you intend to run this model on

- the path to an output file in which the tuning records will be stored

- a path to the model to be tuned.

### Import model tuning dependencies

In [8]:
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm import autotvm

### Set a LocalRunner

Set up some basic parameters for the runner. The runner takes compiled code
that is generated with a specific set of parameters and measures the
performance of it. ``number`` specifies the number of different
configurations that we will test, while ``repeat`` specifies how many
measurements we will take of each configuration. ``min_repeat_ms`` is a value
that specifies how long need to run configuration test. If the number of
repeats falls under this time, it will be increased. This option is necessary
for accurate tuning on GPUs, and is not required for CPU tuning. Setting this
value to 0 disables it. The ``timeout`` places an upper limit on how long to
run training code for each tested configuration.




In [9]:
number = 1
repeat = 30
min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
timeout = 10  # in seconds

# create a TVM runner on local device
runner = autotvm.LocalRunner(
    number=number,
    repeat=repeat,
    timeout=timeout,
    min_repeat_ms=min_repeat_ms,
    enable_cpu_cache_flush=True,
)

### Tuning options

Simple Structure for holding tuning options.

In [10]:
# Tuning parameters
tuning_option = {
    "tuning_records": "signLanguageModel-v3-autotuning-cpu.json",
    "use_transfer_learning": True,
    "tuner": "xgb",
    "n_trial": 1500,
    "early_stopping": 100,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
    ),
}

### Extract Task from the model

Task is a tunable composition of template functions.

Tuner takes a tunable task and optimizes the joint configuration space of all the template functions in the task. This module defines the task data structure, as well as a collection(zoo) of typical tasks of interest.

Definition of task function.

Task can be constructed from tuple of func, args, and kwargs. func is a state-less function, or a string that registers the standard task.



In [14]:
# begin by extracting the tasks from the onnx model
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)



### Tune Task sequentially

In [15]:
# Create tuning function
def tune_tasks(
    tasks,
    measure_option,
    tuner="xgb",
    n_trial=1000,
    early_stopping=None,
    tuning_records="tuning.json",
    use_transfer_learning=True,
):
    # create tmp log file
    tmp_log_file = tuning_records + ".tmp"
    if os.path.exists(tmp_log_file):
        os.remove(tmp_log_file)

    for i, tsk in enumerate(reversed(tasks)):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # create tuner
        if tuner == "xgb" or tuner == "xgb-rank":
            tuner_obj = XGBTuner(tsk, loss_type="rank")
        elif tuner == "xgb_knob":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
        elif tuner == "xgb_itervar":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
        elif tuner == "xgb_curve":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
        elif tuner == "ga":
            tuner_obj = GATuner(tsk, pop_size=50)
        elif tuner == "random":
            tuner_obj = RandomTuner(tsk)
        elif tuner == "gridsearch":
            tuner_obj = GridSearchTuner(tsk)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

        # process tuning
        tsk_trial = n_trial
        tuner_obj.tune(
            n_trial=tsk_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
                autotvm.callback.log_to_file(tmp_log_file),
            ],
        )

    # pick best records to a cache file
    autotvm.record.pick_best(tmp_log_file, tuning_records)
    os.remove(tmp_log_file)

### Start tuning

In [16]:
tune_tasks(tasks, **tuning_option)

[Task  1/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  1/16]  Current/Best:    0.14/   0.79 GFLOPS | Progress: (8/1500) | 14.17 s



[Task  1/16]  Current/Best:    0.20/   1.23 GFLOPS | Progress: (16/1500) | 23.19 s



[Task  1/16]  Current/Best:    0.96/   1.23 GFLOPS | Progress: (24/1500) | 24.86 s



[Task  1/16]  Current/Best:    0.18/   1.23 GFLOPS | Progress: (32/1500) | 26.99 s



[Task  1/16]  Current/Best:    0.91/   1.23 GFLOPS | Progress: (40/1500) | 28.84 s



[Task  1/16]  Current/Best:    0.18/   1.23 GFLOPS | Progress: (48/1500) | 30.73 s



[Task  1/16]  Current/Best:    0.18/   1.23 GFLOPS | Progress: (54/1500) | 32.05 s Done.
[Task  2/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  2/16]  Current/Best:    0.19/   0.92 GFLOPS | Progress: (8/1500) | 3.12 s



[Task  2/16]  Current/Best:    1.27/   1.27 GFLOPS | Progress: (16/1500) | 4.74 s



[Task  2/16]  Current/Best:    0.16/   1.60 GFLOPS | Progress: (24/1500) | 6.76 s



[Task  2/16]  Current/Best:    0.07/   1.60 GFLOPS | Progress: (27/1500) | 7.58 s



 Done.
[Task  3/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  3/16]  Current/Best:    4.60/   4.67 GFLOPS | Progress: (8/1500) | 2.92 s



[Task  3/16]  Current/Best:    4.65/   4.72 GFLOPS | Progress: (16/1500) | 4.60 s



[Task  3/16]  Current/Best:    1.09/   5.26 GFLOPS | Progress: (24/1500) | 6.36 s



[Task  3/16]  Current/Best:    0.73/   5.26 GFLOPS | Progress: (32/1500) | 8.14 s



[Task  3/16]  Current/Best:    0.88/   5.26 GFLOPS | Progress: (40/1500) | 10.08 s



[Task  3/16]  Current/Best:    1.12/   5.26 GFLOPS | Progress: (48/1500) | 11.89 s



[Task  3/16]  Current/Best:    4.61/   5.26 GFLOPS | Progress: (56/1500) | 13.63 s



[Task  3/16]  Current/Best:    4.71/   5.35 GFLOPS | Progress: (96/1500) | 29.98 s



[Task  3/16]  Current/Best:    2.72/   5.35 GFLOPS | Progress: (104/1500) | 32.14 s



[Task  3/16]  Current/Best:    5.21/   5.35 GFLOPS | Progress: (112/1500) | 33.87 s



[Task  3/16]  Current/Best:    1.36/   5.35 GFLOPS | Progress: (120/1500) | 35.74 s



[Task  3/16]  Current/Best:    1.08/   5.35 GFLOPS | Progress: (128/1500) | 40.09 s



[Task  3/16]  Current/Best:    1.16/   5.35 GFLOPS | Progress: (136/1500) | 41.81 s



[Task  3/16]  Current/Best:    1.27/   5.35 GFLOPS | Progress: (144/1500) | 43.60 s



[Task  3/16]  Current/Best:    1.24/   5.35 GFLOPS | Progress: (152/1500) | 45.36 s



[Task  3/16]  Current/Best:    0.51/   5.35 GFLOPS | Progress: (160/1500) | 47.14 s



[Task  3/16]  Current/Best:    1.29/   5.35 GFLOPS | Progress: (168/1500) | 48.96 s



[Task  3/16]  Current/Best:    0.90/   5.35 GFLOPS | Progress: (176/1500) | 50.80 s



 Done.
[Task  4/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  4/16]  Current/Best:    1.21/   1.29 GFLOPS | Progress: (8/1500) | 11.74 s



[Task  4/16]  Current/Best:    0.99/   1.29 GFLOPS | Progress: (16/1500) | 13.46 s



[Task  4/16]  Current/Best:    1.08/   1.29 GFLOPS | Progress: (24/1500) | 15.84 s



[Task  4/16]  Current/Best:    2.62/   2.62 GFLOPS | Progress: (32/1500) | 19.40 s



[Task  4/16]  Current/Best:    1.27/   2.62 GFLOPS | Progress: (40/1500) | 25.43 s



[Task  4/16]  Current/Best:    1.05/   2.62 GFLOPS | Progress: (48/1500) | 27.86 s



[Task  4/16]  Current/Best:    1.05/   2.62 GFLOPS | Progress: (56/1500) | 34.13 s



 Done.
[Task  5/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  5/16]  Current/Best:    2.20/   5.67 GFLOPS | Progress: (8/1500) | 2.89 s



[Task  5/16]  Current/Best:    5.25/   5.67 GFLOPS | Progress: (16/1500) | 4.78 s



[Task  5/16]  Current/Best:    2.16/   5.67 GFLOPS | Progress: (24/1500) | 7.05 s



[Task  5/16]  Current/Best:    2.12/   5.67 GFLOPS | Progress: (32/1500) | 9.30 s



[Task  5/16]  Current/Best:    2.22/   5.67 GFLOPS | Progress: (40/1500) | 11.48 s



[Task  5/16]  Current/Best:    1.88/   5.67 GFLOPS | Progress: (48/1500) | 13.48 s



[Task  5/16]  Current/Best:    5.62/   5.67 GFLOPS | Progress: (56/1500) | 15.24 s



[Task  5/16]  Current/Best:    1.92/   5.76 GFLOPS | Progress: (96/1500) | 33.66 s



[Task  5/16]  Current/Best:    5.41/   5.76 GFLOPS | Progress: (104/1500) | 35.66 s



[Task  5/16]  Current/Best:    2.31/   5.76 GFLOPS | Progress: (112/1500) | 37.55 s



[Task  5/16]  Current/Best:    2.21/   5.76 GFLOPS | Progress: (120/1500) | 40.09 s



[Task  5/16]  Current/Best:    1.54/   5.76 GFLOPS | Progress: (128/1500) | 44.97 s



[Task  5/16]  Current/Best:    1.94/   5.76 GFLOPS | Progress: (136/1500) | 46.72 s



[Task  5/16]  Current/Best:    2.32/   5.76 GFLOPS | Progress: (144/1500) | 49.18 s



[Task  5/16]  Current/Best:    1.72/   5.76 GFLOPS | Progress: (152/1500) | 51.55 s



[Task  5/16]  Current/Best:    1.62/   5.76 GFLOPS | Progress: (160/1500) | 53.82 s



[Task  5/16]  Current/Best:    1.57/   5.76 GFLOPS | Progress: (168/1500) | 56.11 s



[Task  5/16]  Current/Best:    2.39/   5.76 GFLOPS | Progress: (176/1500) | 58.46 s



[Task  5/16]  Current/Best:    2.15/   5.76 GFLOPS | Progress: (184/1500) | 60.63 s



 Done.
[Task  6/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  6/16]  Current/Best:    1.72/   2.28 GFLOPS | Progress: (8/1500) | 7.44 s



[Task  6/16]  Current/Best:    1.37/   2.32 GFLOPS | Progress: (16/1500) | 18.74 s



[Task  6/16]  Current/Best:    2.12/   2.32 GFLOPS | Progress: (24/1500) | 22.63 s



[Task  6/16]  Current/Best:    1.68/   2.32 GFLOPS | Progress: (32/1500) | 24.48 s



[Task  6/16]  Current/Best:    2.27/   2.38 GFLOPS | Progress: (40/1500) | 29.40 s



[Task  6/16]  Current/Best:    1.11/   2.38 GFLOPS | Progress: (48/1500) | 31.66 s



[Task  6/16]  Current/Best:    1.38/   2.38 GFLOPS | Progress: (56/1500) | 43.24 s



[Task  6/16]  Current/Best:    2.36/   2.38 GFLOPS | Progress: (63/1500) | 45.64 s Done.
[Task  7/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  7/16]  Current/Best:    6.95/   6.95 GFLOPS | Progress: (8/1500) | 12.62 s



[Task  7/16]  Current/Best:    6.89/   9.06 GFLOPS | Progress: (16/1500) | 15.59 s



[Task  7/16]  Current/Best:    9.82/   9.82 GFLOPS | Progress: (24/1500) | 27.10 s



[Task  7/16]  Current/Best:    7.61/   9.82 GFLOPS | Progress: (32/1500) | 38.62 s



[Task  7/16]  Current/Best:    5.29/   9.85 GFLOPS | Progress: (40/1500) | 42.96 s



[Task  7/16]  Current/Best:    7.90/   9.85 GFLOPS | Progress: (48/1500) | 54.52 s



[Task  7/16]  Current/Best:    8.28/   9.85 GFLOPS | Progress: (56/1500) | 59.00 s



[Task  7/16]  Current/Best:    6.36/   9.91 GFLOPS | Progress: (64/1500) | 72.15 s



[Task  7/16]  Current/Best:    8.50/   9.91 GFLOPS | Progress: (72/1500) | 74.09 s



[Task  7/16]  Current/Best:    8.12/   9.91 GFLOPS | Progress: (80/1500) | 76.32 s



[Task  7/16]  Current/Best:    6.19/   9.91 GFLOPS | Progress: (88/1500) | 78.94 s



[Task  7/16]  Current/Best:    7.76/   9.91 GFLOPS | Progress: (96/1500) | 81.45 s



[Task  7/16]  Current/Best:    7.64/   9.91 GFLOPS | Progress: (104/1500) | 83.90 s



[Task  7/16]  Current/Best:    5.82/   9.91 GFLOPS | Progress: (112/1500) | 86.40 s



[Task  7/16]  Current/Best:    7.68/   9.91 GFLOPS | Progress: (120/1500) | 88.84 s



[Task  7/16]  Current/Best:    8.69/   9.91 GFLOPS | Progress: (128/1500) | 101.14 s



[Task  7/16]  Current/Best:    7.45/   9.91 GFLOPS | Progress: (136/1500) | 103.04 s



[Task  7/16]  Current/Best:    6.33/   9.91 GFLOPS | Progress: (144/1500) | 105.36 s



[Task  7/16]  Current/Best:    5.04/   9.91 GFLOPS | Progress: (152/1500) | 107.32 s



[Task  7/16]  Current/Best:    7.36/   9.91 GFLOPS | Progress: (160/1500) | 109.44 s Done.
[Task  8/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  8/16]  Current/Best:    1.96/   7.68 GFLOPS | Progress: (8/1500) | 12.84 s



[Task  8/16]  Current/Best:    1.91/   8.40 GFLOPS | Progress: (16/1500) | 21.47 s



[Task  8/16]  Current/Best:    0.32/   8.65 GFLOPS | Progress: (24/1500) | 32.87 s



[Task  8/16]  Current/Best:    7.85/   8.65 GFLOPS | Progress: (32/1500) | 44.44 s



[Task  8/16]  Current/Best:    6.62/   8.65 GFLOPS | Progress: (40/1500) | 58.14 s



[Task  8/16]  Current/Best:    5.46/   9.16 GFLOPS | Progress: (48/1500) | 69.67 s



[Task  8/16]  Current/Best:    3.86/   9.16 GFLOPS | Progress: (56/1500) | 81.00 s



[Task  8/16]  Current/Best:    2.19/   9.16 GFLOPS | Progress: (64/1500) | 101.20 s



[Task  8/16]  Current/Best:    7.33/   9.16 GFLOPS | Progress: (72/1500) | 105.29 s



[Task  8/16]  Current/Best:    2.88/   9.31 GFLOPS | Progress: (80/1500) | 107.30 s



[Task  8/16]  Current/Best:    2.04/   9.31 GFLOPS | Progress: (88/1500) | 111.03 s



[Task  8/16]  Current/Best:    1.91/   9.31 GFLOPS | Progress: (96/1500) | 121.83 s



[Task  8/16]  Current/Best:    1.00/   9.31 GFLOPS | Progress: (104/1500) | 133.61 s



[Task  8/16]  Current/Best:    1.30/   9.31 GFLOPS | Progress: (110/1500) | 144.67 s



 Done.
[Task  9/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  9/16]  Current/Best:    7.73/   8.47 GFLOPS | Progress: (8/1500) | 6.26 s



[Task  9/16]  Current/Best:    7.70/   9.55 GFLOPS | Progress: (16/1500) | 8.95 s



[Task  9/16]  Current/Best:    7.78/   9.55 GFLOPS | Progress: (24/1500) | 11.43 s



[Task  9/16]  Current/Best:    7.76/   9.55 GFLOPS | Progress: (32/1500) | 13.80 s



[Task  9/16]  Current/Best:    6.88/   9.55 GFLOPS | Progress: (40/1500) | 16.76 s



[Task  9/16]  Current/Best:    7.73/   9.55 GFLOPS | Progress: (48/1500) | 19.15 s



[Task  9/16]  Current/Best:    3.70/   9.55 GFLOPS | Progress: (56/1500) | 22.12 s



[Task  9/16]  Current/Best:    3.75/   9.55 GFLOPS | Progress: (64/1500) | 27.21 s



[Task  9/16]  Current/Best:    8.71/   9.55 GFLOPS | Progress: (72/1500) | 29.26 s



[Task  9/16]  Current/Best:    8.75/   9.55 GFLOPS | Progress: (80/1500) | 31.31 s



[Task  9/16]  Current/Best:    3.64/   9.55 GFLOPS | Progress: (88/1500) | 33.64 s



[Task  9/16]  Current/Best:    8.57/   9.55 GFLOPS | Progress: (96/1500) | 36.07 s



[Task  9/16]  Current/Best:    9.39/   9.55 GFLOPS | Progress: (104/1500) | 38.39 s



[Task  9/16]  Current/Best:    5.96/   9.55 GFLOPS | Progress: (112/1500) | 41.27 s



 Done.
[Task 10/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 10/16]  Current/Best:    4.59/   8.41 GFLOPS | Progress: (8/1500) | 12.17 s



[Task 10/16]  Current/Best:    8.57/   8.57 GFLOPS | Progress: (16/1500) | 27.36 s



[Task 10/16]  Current/Best:    7.61/   8.57 GFLOPS | Progress: (24/1500) | 39.09 s



[Task 10/16]  Current/Best:    8.05/   8.57 GFLOPS | Progress: (32/1500) | 53.77 s



[Task 10/16]  Current/Best:    1.39/   8.57 GFLOPS | Progress: (40/1500) | 61.67 s



[Task 10/16]  Current/Best:    0.88/   8.57 GFLOPS | Progress: (48/1500) | 73.03 s



[Task 10/16]  Current/Best:    5.74/   8.57 GFLOPS | Progress: (56/1500) | 84.40 s



[Task 10/16]  Current/Best:    0.95/   8.57 GFLOPS | Progress: (64/1500) | 104.75 s



[Task 10/16]  Current/Best:    7.83/   8.80 GFLOPS | Progress: (72/1500) | 108.52 s



[Task 10/16]  Current/Best:    8.94/   8.94 GFLOPS | Progress: (80/1500) | 110.36 s



[Task 10/16]  Current/Best:    4.55/   9.47 GFLOPS | Progress: (88/1500) | 112.45 s



[Task 10/16]  Current/Best:    6.54/   9.47 GFLOPS | Progress: (96/1500) | 114.50 s



[Task 10/16]  Current/Best:    7.19/   9.47 GFLOPS | Progress: (104/1500) | 117.08 s



[Task 10/16]  Current/Best:    3.23/   9.47 GFLOPS | Progress: (112/1500) | 119.39 s



[Task 10/16]  Current/Best:    2.60/   9.47 GFLOPS | Progress: (120/1500) | 124.63 s



[Task 10/16]  Current/Best:    2.11/   9.47 GFLOPS | Progress: (128/1500) | 139.31 s



[Task 10/16]  Current/Best:    2.27/   9.47 GFLOPS | Progress: (136/1500) | 153.89 s



[Task 10/16]  Current/Best:    1.56/   9.47 GFLOPS | Progress: (144/1500) | 168.73 s



[Task 10/16]  Current/Best:    2.51/   9.47 GFLOPS | Progress: (152/1500) | 184.27 s



[Task 10/16]  Current/Best:    3.37/   9.47 GFLOPS | Progress: (160/1500) | 198.95 s



[Task 10/16]  Current/Best:    3.55/   9.47 GFLOPS | Progress: (168/1500) | 214.22 s



[Task 10/16]  Current/Best:    1.26/   9.47 GFLOPS | Progress: (176/1500) | 229.44 s



 Done.
[Task 11/16]  Current/Best:    3.26/   7.10 GFLOPS | Progress: (8/1500) | 6.43 s



[Task 11/16]  Current/Best:    5.92/   7.10 GFLOPS | Progress: (24/1500) | 10.81 s



[Task 11/16]  Current/Best:    6.47/   7.10 GFLOPS | Progress: (32/1500) | 12.56 s



[Task 11/16]  Current/Best:    5.98/   7.10 GFLOPS | Progress: (48/1500) | 17.07 s



[Task 11/16]  Current/Best:    6.56/   7.10 GFLOPS | Progress: (56/1500) | 19.10 s



[Task 11/16]  Current/Best:    6.40/   7.10 GFLOPS | Progress: (64/1500) | 28.31 s



[Task 11/16]  Current/Best:    4.27/   7.10 GFLOPS | Progress: (72/1500) | 30.13 s



[Task 11/16]  Current/Best:    6.98/   7.10 GFLOPS | Progress: (80/1500) | 32.17 s



[Task 11/16]  Current/Best:    6.55/   7.10 GFLOPS | Progress: (88/1500) | 34.51 s



[Task 11/16]  Current/Best:    3.29/   7.10 GFLOPS | Progress: (96/1500) | 36.52 s



[Task 11/16]  Current/Best:    5.52/   7.10 GFLOPS | Progress: (104/1500) | 38.75 s



 Done.
[Task 12/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 12/16]  Current/Best:    2.95/   5.77 GFLOPS | Progress: (8/1500) | 5.37 s



[Task 12/16]  Current/Best:    2.44/   5.94 GFLOPS | Progress: (16/1500) | 16.77 s



[Task 12/16]  Current/Best:    5.15/   5.94 GFLOPS | Progress: (24/1500) | 28.05 s



[Task 12/16]  Current/Best:    2.52/   6.66 GFLOPS | Progress: (32/1500) | 39.27 s



[Task 12/16]  Current/Best:    2.75/   6.66 GFLOPS | Progress: (40/1500) | 53.56 s



[Task 12/16]  Current/Best:    5.27/   6.66 GFLOPS | Progress: (48/1500) | 57.32 s



[Task 12/16]  Current/Best:    3.30/   6.66 GFLOPS | Progress: (56/1500) | 68.40 s



[Task 12/16]  Current/Best:    2.07/   6.66 GFLOPS | Progress: (64/1500) | 89.21 s



[Task 12/16]  Current/Best:    6.65/   6.68 GFLOPS | Progress: (72/1500) | 92.77 s



[Task 12/16]  Current/Best:    2.89/   6.83 GFLOPS | Progress: (80/1500) | 94.65 s



[Task 12/16]  Current/Best:    3.74/   6.83 GFLOPS | Progress: (88/1500) | 96.54 s



[Task 12/16]  Current/Best:    1.74/   6.83 GFLOPS | Progress: (96/1500) | 98.62 s



[Task 12/16]  Current/Best:    3.16/   6.83 GFLOPS | Progress: (104/1500) | 100.82 s



[Task 12/16]  Current/Best:    3.37/   6.83 GFLOPS | Progress: (112/1500) | 112.35 s



[Task 12/16]  Current/Best:    2.15/   6.83 GFLOPS | Progress: (120/1500) | 123.82 s



[Task 12/16]  Current/Best:    3.27/   6.83 GFLOPS | Progress: (128/1500) | 136.70 s



[Task 12/16]  Current/Best:    1.00/   6.83 GFLOPS | Progress: (136/1500) | 150.95 s



[Task 12/16]  Current/Best:    1.15/   6.83 GFLOPS | Progress: (140/1500) | 161.42 s Done.
[Task 13/16]  Current/Best:    5.94/   6.64 GFLOPS | Progress: (32/1500) | 11.43 s



[Task 13/16]  Current/Best:    4.87/   6.64 GFLOPS | Progress: (40/1500) | 13.55 s



[Task 13/16]  Current/Best:    6.25/   6.64 GFLOPS | Progress: (48/1500) | 15.49 s



[Task 13/16]  Current/Best:    3.56/   6.64 GFLOPS | Progress: (56/1500) | 17.92 s



[Task 13/16]  Current/Best:    2.98/   6.64 GFLOPS | Progress: (64/1500) | 23.32 s



[Task 13/16]  Current/Best:    2.49/   6.64 GFLOPS | Progress: (72/1500) | 26.85 s



[Task 13/16]  Current/Best:    2.29/   6.64 GFLOPS | Progress: (80/1500) | 29.28 s



[Task 13/16]  Current/Best:    3.69/   6.64 GFLOPS | Progress: (88/1500) | 31.43 s



[Task 13/16]  Current/Best:    3.68/   6.64 GFLOPS | Progress: (96/1500) | 34.94 s



[Task 13/16]  Current/Best:    3.88/   6.64 GFLOPS | Progress: (104/1500) | 36.90 s



[Task 13/16]  Current/Best:    3.84/   6.64 GFLOPS | Progress: (112/1500) | 38.70 s



[Task 13/16]  Current/Best:    3.79/   6.64 GFLOPS | Progress: (120/1500) | 41.72 s



 Done.
[Task 14/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 14/16]  Current/Best:    2.85/   3.89 GFLOPS | Progress: (8/1500) | 4.64 s



[Task 14/16]  Current/Best:    3.09/   3.92 GFLOPS | Progress: (16/1500) | 10.73 s



[Task 14/16]  Current/Best:    2.62/   3.95 GFLOPS | Progress: (24/1500) | 12.79 s



[Task 14/16]  Current/Best:    3.85/   3.95 GFLOPS | Progress: (32/1500) | 14.63 s



[Task 14/16]  Current/Best:    3.37/   3.95 GFLOPS | Progress: (40/1500) | 26.04 s



[Task 14/16]  Current/Best:    3.41/   3.95 GFLOPS | Progress: (48/1500) | 32.22 s



[Task 14/16]  Current/Best:    2.09/   3.95 GFLOPS | Progress: (56/1500) | 35.55 s



[Task 14/16]  Current/Best:    1.83/   3.95 GFLOPS | Progress: (64/1500) | 47.97 s



[Task 14/16]  Current/Best:    1.38/   3.95 GFLOPS | Progress: (72/1500) | 58.99 s



 Done.
[Task 15/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 15/16]  Current/Best:    8.69/   9.66 GFLOPS | Progress: (8/1500) | 14.67 s



[Task 15/16]  Current/Best:    8.17/   9.66 GFLOPS | Progress: (16/1500) | 25.76 s



[Task 15/16]  Current/Best:    6.40/   9.66 GFLOPS | Progress: (24/1500) | 40.63 s



[Task 15/16]  Current/Best:    8.85/   9.66 GFLOPS | Progress: (32/1500) | 53.74 s



[Task 15/16]  Current/Best:    9.63/   9.66 GFLOPS | Progress: (40/1500) | 65.19 s



[Task 15/16]  Current/Best:    9.58/   9.66 GFLOPS | Progress: (48/1500) | 79.25 s



[Task 15/16]  Current/Best:    6.60/   9.67 GFLOPS | Progress: (56/1500) | 91.83 s



[Task 15/16]  Current/Best:    6.56/   9.67 GFLOPS | Progress: (64/1500) | 105.73 s



[Task 15/16]  Current/Best:    5.63/   9.67 GFLOPS | Progress: (72/1500) | 109.37 s



[Task 15/16]  Current/Best:    8.83/   9.67 GFLOPS | Progress: (80/1500) | 116.56 s



[Task 15/16]  Current/Best:    5.36/   9.67 GFLOPS | Progress: (88/1500) | 122.51 s



[Task 15/16]  Current/Best:    5.61/   9.67 GFLOPS | Progress: (96/1500) | 124.44 s



[Task 15/16]  Current/Best:    6.18/   9.67 GFLOPS | Progress: (104/1500) | 133.37 s



[Task 15/16]  Current/Best:    3.77/   9.67 GFLOPS | Progress: (112/1500) | 144.52 s



[Task 15/16]  Current/Best:    2.11/   9.67 GFLOPS | Progress: (120/1500) | 156.00 s



[Task 15/16]  Current/Best:    6.94/   9.67 GFLOPS | Progress: (128/1500) | 171.06 s



[Task 15/16]  Current/Best:    6.82/   9.67 GFLOPS | Progress: (136/1500) | 184.96 s



[Task 15/16]  Current/Best:    3.98/   9.67 GFLOPS | Progress: (144/1500) | 195.53 s



[Task 15/16]  Current/Best:    6.71/   9.67 GFLOPS | Progress: (152/1500) | 210.88 s



 Done.
[Task 16/16]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 16/16]  Current/Best:    1.87/   8.16 GFLOPS | Progress: (8/1500) | 14.72 s



[Task 16/16]  Current/Best:    6.21/   8.16 GFLOPS | Progress: (16/1500) | 25.90 s



[Task 16/16]  Current/Best:    1.71/   8.16 GFLOPS | Progress: (24/1500) | 40.45 s



[Task 16/16]  Current/Best:    0.79/   8.16 GFLOPS | Progress: (32/1500) | 55.43 s



[Task 16/16]  Current/Best:    1.87/   8.16 GFLOPS | Progress: (36/1500) | 67.58 s



 Done.


### Compiling and Optimized Model with Tuning Data

The tuning records stored in `signLanguageModel-v1-autotuning-cpu.json`. 

The compiler will use the results to generate high performance code for the model on your specified target.

Now that tuning data for the model has been collected, we can re-compile the model using optimized operators to speed up the computations.

In [11]:
# Recompile the model from the record
with autotvm.apply_history_best(tuning_option["tuning_records"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(mod, target=target, params=params)

# device CPU
dev = tvm.device(str(target), 0)
module = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

print('Optimized module loaded!!!')


Optimized module loaded!!!


## Comparing the Tuned and Untuned models

In [18]:
optimized_time = module.benchmark(device = dev, repeat = 30, number = 1)

print(f'Unoptimized : {timing_results}')

print(f'Optimized : {optimized_time}')


Unoptimized : Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  16.0726      15.1003      28.0091      14.6035       2.8954   
               
Optimized : Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  12.9731      12.6457      14.6032      12.4026       0.6231   
               


## Test the model performance

### Check output shape from the model

In [12]:
sign_test_data = np.expand_dims(X_test[22], axis=0)
sign_test_data.shape

(1, 30, 1662)

In [13]:
sign_actual_data = y_test[22]
sign_actual_data

array([1, 0, 0, 0])

In [14]:
# Output result
dtype = 'float32'
# set input to the module
module.set_input(input_name, sign_test_data)
# run forward execution
module.run()

tvm_output = module.get_output(0).numpy()
print(f'TVM output shape: {tvm_output.shape}')
print(f'TVM Output : {tvm_output}')
print(f'Output: {actions[np.argmax(tvm_output[0])]}')
print(f'Actual test data output: {actions[np.argmax(sign_actual_data)]}')

TVM output shape: (1, 4)
TVM Output : [[9.6187323e-01 3.7256207e-02 3.1616242e-04 5.5441313e-04]]
Output: hello
Actual test data output: hello


In [15]:
# Import 
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import mediapipe as mp
import time

# Define API model to use as a MP hoslistic
mp_holistic = mp.solutions.holistic # Holistic model
mp_drawing = mp.solutions.drawing_utils # Drawing utilities

# Functions
def mediapipe_detection(image, model):
    '''Detect the landmarks of the image using the mediapipe model'''
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # COLOR CONVERSION BGR 2 RGB
    image.flags.writeable = False                  # Image is no longer writeable
    results = model.process(image)                 # Make prediction
    image.flags.writeable = True                   # Image is now writeable 
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # COLOR COVERSION RGB 2 BGR
    return image, results

def draw_landmarks(image, results):
    '''Function to draw landmarks from the detected image'''
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION) # Draw face connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS) # Draw pose connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS) # Draw right hand connections

def draw_styled_landmarks(image, results):
    '''Same as draw_landmarks but with different colors and thickness'''
    # Draw face connections
    mp_drawing.draw_landmarks(image, results.face_landmarks, mp_holistic.FACEMESH_TESSELATION, 
                             mp_drawing.DrawingSpec(color=(80,110,10), thickness=1, circle_radius=1), 
                             mp_drawing.DrawingSpec(color=(80,256,121), thickness=1, circle_radius=1)
                             ) 
    # Draw pose connections
    mp_drawing.draw_landmarks(image, results.pose_landmarks, mp_holistic.POSE_CONNECTIONS,
                             mp_drawing.DrawingSpec(color=(80,22,10), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(80,44,121), thickness=2, circle_radius=2)
                             ) 
    # Draw left hand connections
    mp_drawing.draw_landmarks(image, results.left_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(121,22,76), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(121,44,250), thickness=2, circle_radius=2)
                             ) 
    # Draw right hand connections  
    mp_drawing.draw_landmarks(image, results.right_hand_landmarks, mp_holistic.HAND_CONNECTIONS, 
                             mp_drawing.DrawingSpec(color=(245,117,66), thickness=2, circle_radius=4), 
                             mp_drawing.DrawingSpec(color=(245,66,230), thickness=2, circle_radius=2)
                             ) 
    
def extract_keypoints(results):
    '''Extract keypoints from the results and combine them into a single array'''
    pose = np.array([[res.x, res.y, res.z, res.visibility] for res in results.pose_landmarks.landmark]).flatten() if results.pose_landmarks else np.zeros(33*4)
    face = np.array([[res.x, res.y, res.z] for res in results.face_landmarks.landmark]).flatten() if results.face_landmarks else np.zeros(468*3)
    lh = np.array([[res.x, res.y, res.z] for res in results.left_hand_landmarks.landmark]).flatten() if results.left_hand_landmarks else np.zeros(21*3)
    rh = np.array([[res.x, res.y, res.z] for res in results.right_hand_landmarks.landmark]).flatten() if results.right_hand_landmarks else np.zeros(21*3)
    return np.concatenate([pose, face, lh, rh])

colors = [(245,117,16), (117,245,16), (16,117,245), (116,57,39)]
def prob_viz(res, actions, input_frame, colors):
    output_frame = input_frame.copy()
    for num, prob in enumerate(res):
        cv2.rectangle(output_frame, (0,60+num*40), (int(prob*100), 90+num*40), colors[num], -1)
        cv2.putText(output_frame, actions[num], (0, 85+num*40), cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
        
    return output_frame

# Real time prediction
sequence = []
sentence = []
threshold = 0.7
count = 0
prev_frame_time = 0
new_frame_time = 0
cap = cv2.VideoCapture(33)
# Set mediapipe model 
with mp_holistic.Holistic(min_detection_confidence=0.5, min_tracking_confidence=0.5) as holistic:
    while cap.isOpened():

        # Read feed
        ret, frame = cap.read()

        # Make detections with mediapipe
        image, results = mediapipe_detection(frame, holistic)
        
        # Draw landmarks
        draw_styled_landmarks(image, results)
        
        # 2. Prediction logic with my model
        keypoints = extract_keypoints(results)
        sequence.append(keypoints)
        count += 1
        sequence = sequence[-30:]

        # calculate frame rate
        new_frame_time = time.time()
        # fps will be number of frame processed in given time frame
        fps = 1/(new_frame_time - prev_frame_time)
        prev_frame_time = new_frame_time
        # convert into int
        fps = int(fps)
        # convert into string
        fps = str(fps)
        cv2.putText(image, f'FPS: {fps}', (300, 100), cv2.FONT_HERSHEY_COMPLEX, 2, (100, 255, 0), 2, cv2.LINE_AA)

        # Start prediction when the sequence equal to 30 frames + time for transition about 5 frames
        if len(sequence) == 30:
            # Inference
            # Output result
            dtype = 'float32'
            # set input to the module
            module.set_input(input_name, np.expand_dims(sequence, axis = 0))
            # run forward execution
            module.run()
            # get prediction result
            res = module.get_output(0).numpy()[0]


            # Viz probabilities
            image = prob_viz(res, actions, image, colors)
            count = 0
            # print(actions[np.argmax(res)])
            
            
            #3. Visualize logic
            if res[np.argmax(res)] > threshold: 
                if len(sentence) > 0: 
                    if actions[np.argmax(res)] != sentence[-1]:
                        sentence.append(actions[np.argmax(res)])
                else:
                    sentence.append(actions[np.argmax(res)])

            if len(sentence) > 5: 
                sentence = sentence[-5:]

            
            
        cv2.rectangle(image, (0,0), (640, 40), (245, 117, 16), -1)
        cv2.putText(image, ' '.join(sentence), (3,30), 
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2, cv2.LINE_AA)
        
        # Show to screen
        cv2.imshow('OpenCV Feed', image)
        # Break gracefully
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    cap.release()
    cv2.destroyAllWindows()



INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
