## Import dependencies, library

In [1]:
import tvm
import tvm.relay as relay
from tvm.contrib import graph_executor
from tvm.contrib.download import download_testdata
import numpy as np
from PIL import Image
import onnx
import os



## Downloading and Loading the ONNX Model

In [2]:
model_path = 'mobilenetv2_sigmoid.onnx'
onnx_model = onnx.load(model_path)

# Seed numpy's RNG to get consistent results
np.random.seed(0)

In [3]:
# Print the input names
print("Input names:")
for input_tensor in onnx_model.graph.input:
    print(input_tensor.name)

Input names:
args_0


## Downloading, Preprocessing, and Loading the Test Image

In [4]:
img_path = 'test.jpg'

# Resize it to 224x224
resized_image = Image.open(img_path).resize((224, 224))
img_data = np.asarray(resized_image).astype("float32")

# Normalize according to the ImageNet input specification
norm_img_data = img_data/255.0

# Add the batch dimension, as we are expecting 4-dimensional input: NCHW.
img_data = np.expand_dims(norm_img_data, axis=0)

## Define Compile Target

choose cpu target: llvm

In [5]:
target = "llvm"

## Compile the Model With Relay

In [6]:
input_name = input_tensor.name
shape_dict = {input_name: img_data.shape}

mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


## Run AI module

In [7]:
dtype = "float32"
module.set_input(input_name, img_data)
module.run()

## Get output from module

In [8]:
tvm_output = module.get_output(0).numpy()

## Benchmark Module Performance

In [9]:
unoptimized = module.benchmark(dev, number=1, repeat=30)
print(unoptimized)

Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  13.4164      12.7265      25.9232      12.1900       2.7094   
               


## Postprocess the output

In [10]:
from scipy.special import softmax

# # Download a list of labels
# labels_url = "https://s3.amazonaws.com/onnx-model-zoo/synset.txt"
# labels_path = download_testdata(labels_url, "synset.txt", module="data")

# with open(labels_path, "r") as f:
#     labels = [l.rstrip() for l in f]

labels = ["cracked", "normal"]

# Open the output and read the output tensor
scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))

class='plastic' with probability=0.398203
class='glass' with probability=0.152039
class='paper' with probability=0.150148
class='metal' with probability=0.149947
class='cardboard' with probability=0.149663


## Import model tuning dependencies

In [11]:
# import tvm.auto_scheduler as auto_scheduler
from tvm.autotvm.tuner import XGBTuner, GATuner, RandomTuner, GridSearchTuner
from tvm import autotvm

## Set LocalRunner

Set up some basic parameters for the runner. The runner takes compiled code
that is generated with a specific set of parameters and measures the
performance of it. ``number`` specifies the number of different
configurations that we will test, while ``repeat`` specifies how many
measurements we will take of each configuration. ``min_repeat_ms`` is a value
that specifies how long need to run configuration test. If the number of
repeats falls under this time, it will be increased. This option is necessary
for accurate tuning on GPUs, and is not required for CPU tuning. Setting this
value to 0 disables it. The ``timeout`` places an upper limit on how long to
run training code for each tested configuration.



In [12]:
number = 8
repeat = 1
min_repeat_ms = 0  # since we're tuning on a CPU, can be set to 0
timeout = 10  # in seconds

# create a TVM runner
runner = autotvm.LocalRunner(
    number=number,
    repeat=repeat,
    timeout=timeout,
    min_repeat_ms=min_repeat_ms,
    enable_cpu_cache_flush=True,
)

## Set tuning options

Create a simple structure for holding tuning options. We use an XGBoost
algorithim for guiding the search. For a production job, you will want to set
the number of trials to be larger than the value of 20 used here. For CPU we
recommend 1500, for GPU 3000-4000. The number of trials required can depend
on the particular model and processor, so it's worth spending some time
evaluating performance across a range of values to find the best balance
between tuning time and model optimization. Because running tuning is time
intensive we set number of trials to 10, but do not recommend a value this
small. The ``early_stopping`` parameter is the minimum number of trails to
run before a condition that stops the search early can be applied. The
measure option indicates where trial code will be built, and where it will be
run. In this case, we're using the ``LocalRunner`` we just created and a
``LocalBuilder``. The ``tuning_records`` option specifies a file to write
the tuning data to.



## Tuning Options

Create a simple structure for holding tuning options.



In [16]:
tuning_option = {
    "tuning_records": "mobilenetv2.json",
    "use_transfer_learning": True,
    "tuner": "xgb",
    "n_trial": 1000,
    "early_stopping": 100,
    "measure_option": autotvm.measure_option(
        builder=autotvm.LocalBuilder(build_func="default"), runner=runner
    ),
}

## Extract task from model

In [17]:
tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params)

## Tune task sequentially

In [18]:
def tune_tasks(
    tasks,
    measure_option,
    tuner="xgb",
    n_trial=1000,
    early_stopping=100,
    tuning_records="tuning.json",
    use_transfer_learning=True,
):
    # create tmp log file
    tmp_log_file = tuning_records + ".tmp"
    if os.path.exists(tmp_log_file):
        os.remove(tmp_log_file)

    for i, tsk in enumerate(reversed(tasks)):
        prefix = "[Task %2d/%2d] " % (i + 1, len(tasks))

        # create tuner
        if tuner == "xgb" or tuner == "xgb-rank":
            tuner_obj = XGBTuner(tsk, loss_type="rank")
        elif tuner == "xgb_knob":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="knob")
        elif tuner == "xgb_itervar":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="itervar")
        elif tuner == "xgb_curve":
            tuner_obj = XGBTuner(tsk, loss_type="rank", feature_type="curve")
        elif tuner == "ga":
            tuner_obj = GATuner(tsk, pop_size=50)
        elif tuner == "random":
            tuner_obj = RandomTuner(tsk)
        elif tuner == "gridsearch":
            tuner_obj = GridSearchTuner(tsk)
        else:
            raise ValueError("Invalid tuner: " + tuner)

        if use_transfer_learning:
            if os.path.isfile(tmp_log_file):
                tuner_obj.load_history(autotvm.record.load_from_file(tmp_log_file))

        # process tuning
        tsk_trial = n_trial
        tuner_obj.tune(
            n_trial=tsk_trial,
            early_stopping=early_stopping,
            measure_option=measure_option,
            callbacks=[
                autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
                autotvm.callback.log_to_file(tmp_log_file),
            ],
        )

    # pick best records to a cache file
    autotvm.record.pick_best(tmp_log_file, tuning_records)
    os.remove(tmp_log_file)

In [19]:
tune_tasks(tasks, **tuning_option)

[Task  1/34]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  1/34]  Current/Best:    2.10/   7.39 GFLOPS | Progress: (30/1500) | 14.84 s Done.
[Task  2/34]  Current/Best:    6.69/  10.04 GFLOPS | Progress: (20/1500) | 6.68 s Done.




[Task  3/34]  Current/Best:   19.56/  19.71 GFLOPS | Progress: (56/1500) | 50.96 s



[Task  3/34]  Current/Best:   19.68/  19.79 GFLOPS | Progress: (120/1500) | 97.28 s



[Task  3/34]  Current/Best:   11.78/  20.39 GFLOPS | Progress: (184/1500) | 129.24 s



[Task  3/34]  Current/Best:   10.13/  20.39 GFLOPS | Progress: (352/1500) | 226.03 s



[Task  3/34]  Current/Best:    1.62/  20.39 GFLOPS | Progress: (376/1500) | 250.30 s



[Task  3/34]  Current/Best:    6.44/  20.43 GFLOPS | Progress: (448/1500) | 300.42 s



[Task  3/34]  Current/Best:    5.93/  20.57 GFLOPS | Progress: (496/1500) | 339.42 s



[Task  3/34]  Current/Best:    7.90/  20.57 GFLOPS | Progress: (512/1500) | 356.66 s



[Task  3/34]  Current/Best:    8.37/  20.57 GFLOPS | Progress: (576/1500) | 404.12 s



[Task  3/34]  Current/Best:    8.44/  20.57 GFLOPS | Progress: (704/1500) | 516.45 s



[Task  3/34]  Current/Best:   14.01/  20.57 GFLOPS | Progress: (768/1500) | 570.58 s



[Task  3/34]  Current/Best:    4.37/  20.57 GFLOPS | Progress: (800/1500) | 597.45 s



[Task  3/34]  Current/Best:    4.36/  20.57 GFLOPS | Progress: (816/1500) | 613.73 s



[Task  3/34]  Current/Best:    5.53/  20.57 GFLOPS | Progress: (864/1500) | 644.39 s



[Task  3/34]  Current/Best:    8.44/  20.57 GFLOPS | Progress: (880/1500) | 659.26 s



[Task  3/34]  Current/Best:    6.15/  20.57 GFLOPS | Progress: (896/1500) | 676.61 s



[Task  3/34]  Current/Best:    8.28/  20.57 GFLOPS | Progress: (912/1500) | 694.78 s



[Task  3/34]  Current/Best:   12.04/  20.57 GFLOPS | Progress: (920/1500) | 706.12 s



[Task  3/34]  Current/Best:    2.20/  20.57 GFLOPS | Progress: (944/1500) | 725.10 s



[Task  3/34]  Current/Best:    2.25/  20.57 GFLOPS | Progress: (952/1500) | 736.31 s



[Task  3/34]  Current/Best:    8.19/  20.57 GFLOPS | Progress: (960/1500) | 750.12 s



[Task  3/34]  Current/Best:    4.17/  20.57 GFLOPS | Progress: (990/1500) | 777.55 s Done.




[Task  4/34]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  4/34]  Current/Best:   16.22/  16.56 GFLOPS | Progress: (8/1500) | 14.39 s



[Task  4/34]  Current/Best:   11.04/  16.56 GFLOPS | Progress: (16/1500) | 25.80 s



[Task  4/34]  Current/Best:    4.55/  16.56 GFLOPS | Progress: (32/1500) | 42.39 s



[Task  4/34]  Current/Best:   12.22/  18.06 GFLOPS | Progress: (40/1500) | 56.52 s



[Task  4/34]  Current/Best:   10.62/  18.06 GFLOPS | Progress: (48/1500) | 70.71 s



[Task  4/34]  Current/Best:    4.57/  18.06 GFLOPS | Progress: (56/1500) | 84.88 s



[Task  4/34]  Current/Best:   10.62/  18.06 GFLOPS | Progress: (64/1500) | 106.47 s



[Task  4/34]  Current/Best:    1.04/  19.23 GFLOPS | Progress: (136/1500) | 149.52 s



[Task  4/34]  Current/Best:    0.64/  19.23 GFLOPS | Progress: (144/1500) | 164.43 s



[Task  4/34]  Current/Best:    1.10/  19.23 GFLOPS | Progress: (152/1500) | 179.39 s



[Task  4/34]  Current/Best:    3.54/  19.23 GFLOPS | Progress: (160/1500) | 193.61 s



[Task  4/34]  Current/Best:    7.13/  19.23 GFLOPS | Progress: (168/1500) | 208.23 s



[Task  5/34]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s33 s



[Task  5/34]  Current/Best:    5.76/  89.63 GFLOPS | Progress: (56/1500) | 33.91 s



[Task  5/34]  Current/Best:   21.36/ 101.20 GFLOPS | Progress: (120/1500) | 75.83 s



[Task  5/34]  Current/Best:   84.04/ 101.20 GFLOPS | Progress: (184/1500) | 103.93 s



[Task  5/34]  Current/Best:   47.61/ 102.04 GFLOPS | Progress: (248/1500) | 130.71 s



[Task  5/34]  Current/Best:   10.83/ 102.04 GFLOPS | Progress: (312/1500) | 151.67 s



[Task  5/34]  Current/Best:   39.09/ 104.43 GFLOPS | Progress: (376/1500) | 172.63 s



[Task  5/34]  Current/Best:   58.18/ 104.43 GFLOPS | Progress: (576/1500) | 245.43 s



[Task  5/34]  Current/Best:    7.39/ 104.43 GFLOPS | Progress: (744/1500) | 309.88 s



[Task  5/34]  Current/Best:   11.25/ 104.43 GFLOPS | Progress: (768/1500) | 337.90 s



[Task  5/34]  Current/Best:   30.84/ 104.43 GFLOPS | Progress: (896/1500) | 410.54 s



[Task  5/34]  Current/Best:   18.42/ 104.43 GFLOPS | Progress: (1120/1500) | 518.24 s



[Task  5/34]  Current/Best:    5.35/ 104.43 GFLOPS | Progress: (1128/1500) | 529.71 s



[Task  5/34]  Current/Best:    4.70/ 104.43 GFLOPS | Progress: (1136/1500) | 541.57 s



[Task  5/34]  Current/Best:    3.50/ 104.43 GFLOPS | Progress: (1152/1500) | 565.00 s



[Task  5/34]  Current/Best:    4.15/ 104.43 GFLOPS | Progress: (1336/1500) | 654.34 s



[Task  5/34]  Current/Best:    4.46/ 104.43 GFLOPS | Progress: (1344/1500) | 669.16 s



[Task  5/34]  Current/Best:    4.44/ 104.43 GFLOPS | Progress: (1408/1500) | 702.69 s



[Task  5/34]  Current/Best:    5.09/ 104.43 GFLOPS | Progress: (1448/1500) | 727.42 s



[Task  5/34]  Current/Best:    5.77/ 104.43 GFLOPS | Progress: (1464/1500) | 743.47 s



[Task  5/34]  Current/Best:    6.73/ 104.43 GFLOPS | Progress: (1500/1500) | 790.09 s Done.
 Done.




[Task  6/34]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task  6/34]  Current/Best:   50.40/  86.15 GFLOPS | Progress: (56/1500) | 15.64 s



[Task  6/34]  Current/Best:   61.43/ 101.81 GFLOPS | Progress: (120/1500) | 40.03 s



[Task  6/34]  Current/Best:   40.64/ 101.81 GFLOPS | Progress: (184/1500) | 62.05 s



[Task  6/34]  Current/Best:   71.76/ 101.81 GFLOPS | Progress: (248/1500) | 91.56 s



[Task  6/34]  Current/Best:   32.28/ 101.81 GFLOPS | Progress: (312/1500) | 116.39 s



[Task  6/34]  Current/Best:   12.01/ 101.81 GFLOPS | Progress: (1500/1500) | 481.04 s Done.




[Task  7/34]  Current/Best:   62.71/  96.22 GFLOPS | Progress: (56/1500) | 12.46 s



[Task  7/34]  Current/Best:   47.03/  96.92 GFLOPS | Progress: (120/1500) | 39.59 s



[Task  7/34]  Current/Best:   45.00/  98.73 GFLOPS | Progress: (184/1500) | 60.96 s



[Task  7/34]  Current/Best:   78.64/ 101.32 GFLOPS | Progress: (248/1500) | 83.82 s



[Task  7/34]  Current/Best:    6.93/ 101.32 GFLOPS | Progress: (312/1500) | 103.88 s



[Task  7/34]  Current/Best:    8.98/ 101.32 GFLOPS | Progress: (376/1500) | 125.04 s



[Task  7/34]  Current/Best:    7.29/ 101.32 GFLOPS | Progress: (1500/1500) | 457.30 s Done.
[Task  8/34]  Current/Best:    9.38/  22.51 GFLOPS | Progress: (8/1500) | 11.78 s



[Task  8/34]  Current/Best:    9.17/  22.51 GFLOPS | Progress: (16/1500) | 23.11 s



[Task  8/34]  Current/Best:    7.69/  27.43 GFLOPS | Progress: (24/1500) | 34.42 s



[Task  8/34]  Current/Best:    7.89/  27.43 GFLOPS | Progress: (32/1500) | 45.72 s



[Task  8/34]  Current/Best:    1.31/  32.36 GFLOPS | Progress: (48/1500) | 61.13 s



[Task  8/34]  Current/Best:    5.87/  32.36 GFLOPS | Progress: (56/1500) | 65.95 s



[Task  8/34]  Current/Best:    8.45/  32.36 GFLOPS | Progress: (64/1500) | 105.61 s



[Task  8/34]  Current/Best:   16.87/  36.13 GFLOPS | Progress: (120/1500) | 123.12 s



[Task  8/34]  Current/Best:   11.29/  41.70 GFLOPS | Progress: (184/1500) | 152.22 s



[Task  8/34]  Current/Best:    0.91/  41.70 GFLOPS | Progress: (192/1500) | 169.38 s



[Task  8/34]  Current/Best:   16.26/  41.70 GFLOPS | Progress: (248/1500) | 185.64 s



[Task  8/34]  Current/Best:   16.12/  41.70 GFLOPS | Progress: (312/1500) | 214.93 s



[Task  8/34]  Current/Best:    4.69/  41.70 GFLOPS | Progress: (320/1500) | 232.52 s



[Task  8/34]  Current/Best:    8.33/  41.70 GFLOPS | Progress: (376/1500) | 248.16 s



[Task  8/34]  Current/Best:    3.34/  41.70 GFLOPS | Progress: (384/1500) | 263.60 s



[Task  8/34]  Current/Best:   18.92/  41.70 GFLOPS | Progress: (440/1500) | 280.67 s



[Task  8/34]  Current/Best:   12.91/  41.70 GFLOPS | Progress: (504/1500) | 300.08 s



[Task  8/34]  Current/Best:   11.99/  41.70 GFLOPS | Progress: (568/1500) | 320.41 s



[Task  8/34]  Current/Best:   16.28/  41.70 GFLOPS | Progress: (576/1500) | 335.37 s



[Task  8/34]  Current/Best:    1.34/  41.93 GFLOPS | Progress: (640/1500) | 364.67 s



[Task  8/34]  Current/Best:    5.86/  41.93 GFLOPS | Progress: (768/1500) | 414.74 s



[Task  8/34]  Current/Best:   17.33/  41.93 GFLOPS | Progress: (832/1500) | 447.27 s



[Task  8/34]  Current/Best:    4.73/  41.93 GFLOPS | Progress: (960/1500) | 503.95 s



[Task  8/34]  Current/Best:   11.70/  44.21 GFLOPS | Progress: (1152/1500) | 582.25 s



[Task  8/34]  Current/Best:    4.97/  44.21 GFLOPS | Progress: (1344/1500) | 662.67 s



[Task  8/34]  Current/Best:    6.82/  44.21 GFLOPS | Progress: (1408/1500) | 693.99 s



[Task  8/34]  Current/Best:    4.73/  44.21 GFLOPS | Progress: (1500/1500) | 733.23 s Done.




[Task  9/34]  Current/Best:    6.51/ 121.54 GFLOPS | Progress: (56/1500) | 13.09 s



[Task  9/34]  Current/Best:    9.75/ 121.54 GFLOPS | Progress: (120/1500) | 41.82 s



[Task  9/34]  Current/Best:   70.52/ 121.54 GFLOPS | Progress: (184/1500) | 64.79 s



[Task  9/34]  Current/Best:    2.66/ 121.54 GFLOPS | Progress: (248/1500) | 88.46 s



[Task  9/34]  Current/Best:   46.79/ 121.54 GFLOPS | Progress: (312/1500) | 107.76 s



[Task  9/34]  Current/Best:   58.23/ 121.54 GFLOPS | Progress: (440/1500) | 145.32 s



[Task  9/34]  Current/Best:    6.68/ 121.54 GFLOPS | Progress: (504/1500) | 165.01 s



[Task  9/34]  Current/Best:    6.98/ 121.54 GFLOPS | Progress: (576/1500) | 197.13 s



[Task  9/34]  Current/Best:    6.40/ 121.54 GFLOPS | Progress: (968/1500) | 321.81 s



[Task  9/34]  Current/Best:    4.28/ 121.54 GFLOPS | Progress: (1216/1500) | 406.59 s



[Task  9/34]  Current/Best:    5.15/ 121.54 GFLOPS | Progress: (1320/1500) | 447.97 s



[Task  9/34]  Current/Best:    6.97/ 121.54 GFLOPS | Progress: (1500/1500) | 523.80 s Done.




[Task 10/34]  Current/Best:   53.52/  97.15 GFLOPS | Progress: (56/1500) | 12.55 s



[Task 10/34]  Current/Best:   17.11/  97.15 GFLOPS | Progress: (120/1500) | 36.91 s



[Task 10/34]  Current/Best:   25.01/  97.15 GFLOPS | Progress: (184/1500) | 58.63 s



[Task 10/34]  Current/Best:   56.81/  97.15 GFLOPS | Progress: (248/1500) | 79.90 s



[Task 10/34]  Current/Best:   49.35/  97.15 GFLOPS | Progress: (312/1500) | 98.65 s



[Task 10/34]  Current/Best:    4.82/  97.15 GFLOPS | Progress: (1500/1500) | 451.29 s Done.




[Task 11/34]  Current/Best:    4.56/  15.84 GFLOPS | Progress: (16/1500) | 15.33 s



[Task 11/34]  Current/Best:    8.45/  15.84 GFLOPS | Progress: (24/1500) | 26.58 s



[Task 11/34]  Current/Best:    2.80/  15.84 GFLOPS | Progress: (40/1500) | 41.24 s



[Task 11/34]  Current/Best:    6.35/  15.84 GFLOPS | Progress: (56/1500) | 57.02 s



[Task 11/34]  Current/Best:    6.07/  21.50 GFLOPS | Progress: (120/1500) | 119.07 s



[Task 11/34]  Current/Best:    6.27/  21.50 GFLOPS | Progress: (128/1500) | 141.60 s



[Task 11/34]  Current/Best:    0.18/  21.50 GFLOPS | Progress: (184/1500) | 181.44 s



[Task 11/34]  Current/Best:    4.17/  26.24 GFLOPS | Progress: (248/1500) | 219.52 s



[Task 11/34]  Current/Best:    3.89/  26.24 GFLOPS | Progress: (256/1500) | 236.67 s



[Task 11/34]  Current/Best:    0.44/  26.24 GFLOPS | Progress: (448/1500) | 295.91 s



[Task 11/34]  Current/Best:    3.24/  26.24 GFLOPS | Progress: (512/1500) | 324.07 s



[Task 11/34]  Current/Best:    4.02/  26.24 GFLOPS | Progress: (560/1500) | 348.54 s



[Task 11/34]  Current/Best:    3.69/  26.24 GFLOPS | Progress: (576/1500) | 365.63 s



[Task 11/34]  Current/Best:    4.50/  26.24 GFLOPS | Progress: (1032/1500) | 534.53 s



[Task 11/34]  Current/Best:    2.14/  26.24 GFLOPS | Progress: (1088/1500) | 570.49 s



[Task 11/34]  Current/Best:    7.89/  26.24 GFLOPS | Progress: (1096/1500) | 581.78 s



[Task 11/34]  Current/Best:    3.41/  26.24 GFLOPS | Progress: (1104/1500) | 595.05 s



[Task 11/34]  Current/Best:    3.93/  26.24 GFLOPS | Progress: (1152/1500) | 623.08 s



[Task 11/34]  Current/Best:    3.97/  26.24 GFLOPS | Progress: (1184/1500) | 646.32 s



[Task 11/34]  Current/Best:    2.94/  26.24 GFLOPS | Progress: (1192/1500) | 657.27 s



[Task 11/34]  Current/Best:    5.25/  26.24 GFLOPS | Progress: (1208/1500) | 680.54 s



[Task 11/34]  Current/Best:   13.58/  26.24 GFLOPS | Progress: (1216/1500) | 698.24 s



[Task 11/34]  Current/Best:    3.61/  26.24 GFLOPS | Progress: (1264/1500) | 730.23 s



[Task 11/34]  Current/Best:    2.52/  26.24 GFLOPS | Progress: (1272/1500) | 743.82 s



[Task 11/34]  Current/Best:    3.49/  26.24 GFLOPS | Progress: (1280/1500) | 757.79 s



[Task 11/34]  Current/Best:    3.18/  26.24 GFLOPS | Progress: (1320/1500) | 785.42 s



[Task 11/34]  Current/Best:    2.78/  26.24 GFLOPS | Progress: (1328/1500) | 796.68 s



[Task 11/34]  Current/Best:    3.93/  26.24 GFLOPS | Progress: (1336/1500) | 807.99 s



[Task 11/34]  Current/Best:    3.24/  26.24 GFLOPS | Progress: (1344/1500) | 823.27 s



[Task 11/34]  Current/Best:    9.43/  26.24 GFLOPS | Progress: (1368/1500) | 851.24 s



[Task 11/34]  Current/Best:    1.07/  26.24 GFLOPS | Progress: (1376/1500) | 866.45 s



[Task 11/34]  Current/Best:    0.59/  26.24 GFLOPS | Progress: (1384/1500) | 880.71 s



[Task 11/34]  Current/Best:    5.40/  26.24 GFLOPS | Progress: (1392/1500) | 891.80 s



[Task 11/34]  Current/Best:    3.19/  26.24 GFLOPS | Progress: (1400/1500) | 905.39 s



[Task 11/34]  Current/Best:   12.95/  26.24 GFLOPS | Progress: (1408/1500) | 922.45 s



[Task 11/34]  Current/Best:    1.90/  26.24 GFLOPS | Progress: (1432/1500) | 938.88 s



[Task 11/34]  Current/Best:    0.45/  26.24 GFLOPS | Progress: (1440/1500) | 953.34 s



[Task 11/34]  Current/Best:    2.64/  26.24 GFLOPS | Progress: (1448/1500) | 967.55 s



[Task 11/34]  Current/Best:    5.98/  26.24 GFLOPS | Progress: (1456/1500) | 982.58 s



[Task 11/34]  Current/Best:    3.89/  26.24 GFLOPS | Progress: (1464/1500) | 993.67 s



[Task 11/34]  Current/Best:    4.97/  26.24 GFLOPS | Progress: (1472/1500) | 1007.83 s



[Task 11/34]  Current/Best:    3.26/  26.24 GFLOPS | Progress: (1500/1500) | 1030.16 s Done.




[Task 12/34]  Current/Best:    0.00/   0.00 GFLOPS | Progress: (0/1500) | 0.00 s



[Task 12/34]  Current/Best:   69.36/  98.00 GFLOPS | Progress: (56/1500) | 15.19 s



[Task 12/34]  Current/Best:   46.89/  98.00 GFLOPS | Progress: (120/1500) | 41.54 s



[Task 12/34]  Current/Best:   57.53/  98.44 GFLOPS | Progress: (184/1500) | 63.31 s



[Task 12/34]  Current/Best:    7.53/ 103.11 GFLOPS | Progress: (248/1500) | 88.28 s



[Task 12/34]  Current/Best:   51.61/ 112.60 GFLOPS | Progress: (312/1500) | 110.88 s



[Task 12/34]  Current/Best:   51.86/ 112.60 GFLOPS | Progress: (376/1500) | 129.94 s



[Task 12/34]  Current/Best:    8.78/ 112.60 GFLOPS | Progress: (1500/1500) | 481.46 s Done.




[Task 13/34]  Current/Best:    2.96/  58.86 GFLOPS | Progress: (56/1500) | 14.33 s



[Task 13/34]  Current/Best:   31.67/  61.22 GFLOPS | Progress: (120/1500) | 50.08 s



[Task 13/34]  Current/Best:   31.29/  62.21 GFLOPS | Progress: (184/1500) | 82.80 s



[Task 13/34]  Current/Best:   18.27/  62.21 GFLOPS | Progress: (248/1500) | 113.56 s



[Task 13/34]  Current/Best:    2.92/  62.21 GFLOPS | Progress: (264/1500) | 133.03 s



[Task 13/34]  Current/Best:   11.46/  62.21 GFLOPS | Progress: (272/1500) | 143.73 s



[Task 13/34]  Current/Best:   57.22/  62.21 GFLOPS | Progress: (280/1500) | 158.47 s



[Task 13/34]  Current/Best:    7.47/  62.21 GFLOPS | Progress: (288/1500) | 169.83 s



[Task 13/34]  Current/Best:   13.06/  62.21 GFLOPS | Progress: (304/1500) | 191.51 s



[Task 13/34]  Current/Best:   15.82/  62.21 GFLOPS | Progress: (312/1500) | 202.85 s



[Task 13/34]  Current/Best:   14.62/  62.21 GFLOPS | Progress: (320/1500) | 220.29 s



[Task 13/34]  Current/Best:   27.23/  62.21 GFLOPS | Progress: (376/1500) | 236.57 s



[Task 13/34]  Current/Best:    8.49/  62.21 GFLOPS | Progress: (384/1500) | 252.14 s



[Task 13/34]  Current/Best:    1.94/  62.21 GFLOPS | Progress: (440/1500) | 271.81 s



[Task 13/34]  Current/Best:    7.54/  62.21 GFLOPS | Progress: (568/1500) | 317.38 s



[Task 13/34]  Current/Best:   13.16/  62.21 GFLOPS | Progress: (576/1500) | 333.27 s



[Task 13/34]  Current/Best:    3.56/  62.21 GFLOPS | Progress: (640/1500) | 363.79 s



[Task 13/34]  Current/Best:    9.56/  62.21 GFLOPS | Progress: (704/1500) | 397.66 s



[Task 13/34]  Current/Best:    5.08/  62.21 GFLOPS | Progress: (768/1500) | 429.61 s



[Task 13/34]  Current/Best:   13.89/  62.21 GFLOPS | Progress: (896/1500) | 488.74 s



[Task 13/34]  Current/Best:    2.59/  62.21 GFLOPS | Progress: (1088/1500) | 567.28 s



[Task 13/34]  Current/Best:    1.87/  62.21 GFLOPS | Progress: (1152/1500) | 607.76 s



[Task 13/34]  Current/Best:    5.18/  62.21 GFLOPS | Progress: (1216/1500) | 651.67 s



[Task 13/34]  Current/Best:   13.00/  62.21 GFLOPS | Progress: (1408/1500) | 743.11 s



[Task 13/34]  Current/Best:    9.64/  62.21 GFLOPS | Progress: (1500/1500) | 781.38 s Done.




[Task 14/34]  Current/Best:   69.74/ 109.59 GFLOPS | Progress: (56/1500) | 14.18 s



[Task 14/34]  Current/Best:   12.60/ 109.59 GFLOPS | Progress: (120/1500) | 42.83 s



[Task 14/34]  Current/Best:   47.57/ 116.51 GFLOPS | Progress: (184/1500) | 76.61 s

## Compiling an Optimized Model with Tuning Data

As an output of the tuning process above, we obtained the tuning records
stored in ``resnet-18-v2-autotuning.json``.

Now that tuning data for the model has been collected, we can re-compile the
model using optimized operators to speed up our computations.



In [None]:
with autotvm.apply_history_best(tuning_option["tuning_records"]):
    with tvm.transform.PassContext(opt_level=3, config={}):
        lib = relay.build(mod, target=target, params=params)

In [None]:
dev = tvm.device(str(target), 0)
module = graph_executor.GraphModule(lib["default"](dev))

In [None]:
dtype = "float32"
module.set_input(input_name, img_data)
module.run()
tvm_output = module.get_output(0).numpy()

scores = softmax(tvm_output)
scores = np.squeeze(scores)
ranks = np.argsort(scores)[::-1]
for rank in ranks[0:5]:
    print("class='%s' with probability=%f" % (labels[rank], scores[rank]))

class='n02123159 tiger cat' with probability=0.464309
class='n02123045 tabby, tabby cat' with probability=0.328435
class='n02124075 Egyptian cat' with probability=0.154237
class='n02127052 lynx, catamount' with probability=0.034955
class='n02128385 leopard, Panthera pardus' with probability=0.003649


## Comparing the Tuned and Untuned Models
We want to collect some basic performance data associated with this optimized
model to compare it to the unoptimized model. Depending on your underlying
hardware, number of iterations, and other factors, you should see a performance
improvement in comparing the optimized model to the unoptimized model.



In [None]:
optimized = module.benchmark(dev, number=1, repeat=30)

print("Unoptimized: %s" % (unoptimized))
print("Optimized: %s" % (optimized))

Unoptimized: Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  53.4859      53.4593      53.7874      53.3423       0.1066   
               
Optimized: Execution time summary:
 mean (ms)   median (ms)    max (ms)     min (ms)     std (ms)  
  42.7042      42.6927      43.0606      42.4750       0.1293   
               
