In [1]:
import tvm
from tvm import relax
from tvm.relax import Expr, Function
from tvm.ir import IRModule
from tvm.script import relax as R
import numpy as np
import onnx
import time
import matplotlib.pyplot as plt


In [2]:
@R.function
def add_one(x: R.Tensor((10,), "float32")) -> R.Tensor((10,), "float32"):
    with R.dataflow():
        y = R.add(x, R.const(1, "float32"))
        R.output(y)
    return y

mod = IRModule({"main": add_one})

target = "llvm"
ex = relax.build(mod, target)

vm = relax.VirtualMachine(ex, tvm.cpu())

# Try to call 'main'
try:
    out = vm["main"](tvm.nd.array(np.arange(10, dtype="float32")))
    print("Result from main:", out.numpy())
except AttributeError as e:
    print(e)
    print("Trying to call 'add_one' instead")

    # Try 'add_one'
    try:
        out = vm["add_one"](tvm.nd.array(np.arange(10, dtype="float32")))
        print("Result from add_one:", out.numpy())
    except AttributeError as e2:
        print(e2)
        print("Function not found in VM module.")



Module has no function 'main'
Trying to call 'add_one' instead
Result from add_one: [ 1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]


In [3]:
import os
import numpy as np
import torch
from torch.export import export
from torchvision.models.resnet import ResNet18_Weights, resnet18

torch_model = resnet18(weights=ResNet18_Weights.DEFAULT).eval()

In [4]:
import tvm
from tvm import relax
from tvm.relax.frontend.torch import from_exported_program

# Give an example argument to torch.export
example_args = (torch.randn(1, 3, 224, 224, dtype=torch.float32),)

# Skip running in CI environment
IS_IN_CI = os.getenv("CI", "") == "true"

if not IS_IN_CI:
    # Convert the model to IRModule
    with torch.no_grad():
        exported_program = export(torch_model, example_args)
        mod = from_exported_program(exported_program, keep_params_as_input=True)

    mod, params = relax.frontend.detach_params(mod)
    mod.show()
    

In [6]:
!pip install xgboost
TOTAL_TRIALS = 100  # You can increase to 20000 for better tuning quality
target = tvm.target.Target("llvm -num-cores=4")  # Use 'llvm' for Apple macOS CPU
work_dir = "tuning_logs"


# Run tuning pipeline unless in CI environment
if not IS_IN_CI:
    mod_optimized = relax.get_pipeline("static_shape_tuning", target=target, total_trials=TOTAL_TRIALS)(mod)
    mod_optimized["main"].show()
else:
    mod_optimized = mod  # fallback if skipping tuning

Collecting xgboost
  Downloading xgboost-3.0.3-py3-none-macosx_12_0_arm64.whl.metadata (2.1 kB)
Downloading xgboost-3.0.3-py3-none-macosx_12_0_arm64.whl (2.0 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m7.7 MB/s[0m  [33m0:00:00[0mm [31m8.4 MB/s[0m eta [36m0:00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-3.0.3
2025-08-06 13:59:17 [INFO] Logging directory: tuning_logs/logs
2025-08-06 13:59:17 [INFO] LocalBuilder: max_workers = 14
2025-08-06 13:59:17 [INFO] LocalRunner: max_workers = 1
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #0: "reshape"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #1: "fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #2: "fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4"
2025-08-06 13:

  meta_schedule.arm_cpu.meta_schedule.adaptive_pool_avg
  meta_schedule.cpu.meta_schedule.adaptive_pool_avg


2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #12: "fused_conv2d1_subtract1_divide1_expand_dims_multiply1_expand_dims_add2_add3_relu1"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #13: "transpose"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #14: "fused_conv2d2_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #15: "fused_conv2d1_subtract1_divide1_expand_dims_multiply1_expand_dims_add2_relu1"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #16: "fused_conv2d_subtract_divide_expand_dims_multiply_expand_dims_add1_relu"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #17: "fused_conv2d10_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11"
2025-08-06 13:59:18 [INFO] [task_scheduler.cc:167] Initializing Task #18: "fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_a

  meta_schedule.arm_cpu.meta_schedule.pool_max
  meta_schedule.cpu.meta_schedule.pool_max


Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,,,,0,
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,,,,0,
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,,,,0,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 13:59:18 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |            N/A |          N/A |                   N/A |      0 |      
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |            N/A |          N/A |                   N/A |      0 |      
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |            N/A |          N/A |                   N/A |      0 |      
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,,,,0,
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,,,,0,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |      
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |            N/A |          N/A |                   N/A |      0 |      
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |            N/A |          N/A |                   N/A |      0 |      
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,,,,0,
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,,,,0,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,



Total trials: 1
Total latency (us): 9.40678

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |            N/A |          N/A |                   N/A |      0 |      
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |            N/A |          N/A |           

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,,,,0,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |      
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |            N/A |          N/A |                   N/A |      0 |      
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,,,,0,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |            N/A |          N/A |                   N/A |      0 |      
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y


2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |              663.7133 |     64 |    Y 
  3 |            

Unnamed: 0,Name,FLOP,Weight,Speed (GFLOPS),Latency (us),Weighted Latency (us),Trials,Done
0,reshape,1,1,0.0001,9.4068,9.4068,1,Y
1,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4,231361536,2,327.936,705.5082,1411.0164,64,Y
2,fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,115730944,1,174.3689,663.7133,663.7133,64,Y
3,fused_matmul_add13,1025000,1,,,,0,Y
4,adaptive_avg_pool2d,25600,1,,,,0,Y
5,fused_conv2d6_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8_relu3,231461888,1,,,,0,Y
6,fused_conv2d4_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5,13246464,1,,,,0,Y
7,fused_conv2d3_subtract2_divide2_expand_dims1_multiply2_expand_dims1_add5_relu2,231712768,1,,,,0,Y
8,fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4,231336448,1,,,,0,Y
9,fused_conv2d7_subtract3_divide3_expand_dims2_multiply3_expand_dims2_add8,13045760,1,,,,0,Y



Total trials: 129
Total latency (us): 2084.14

2025-08-06 14:00:20 [DEBUG] [task_scheduler.cc:326] 
 ID |                                                                                  Name |      FLOP | Weight | Speed (GFLOPS) | Latency (us) | Weighted Latency (us) | Trials | Done 
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  0 |                                                                               reshape |         1 |      1 |         0.0001 |       9.4068 |                9.4068 |      1 |    Y 
  1 | fused_conv2d9_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_add12_relu4 | 231361536 |      2 |       327.9360 |     705.5082 |             1411.0164 |     64 |    Y 
  2 |       fused_conv2d8_subtract4_divide4_expand_dims3_multiply4_expand_dims3_add11_relu4 | 115730944 |      1 |       174.3689 |     663.7133 |         



In [14]:
# Convert example input tensor to TVM NDArray
input_data = [tvm.nd.array(example_args[0].numpy())]
target = tvm.target.Target("llvm -num-cores=4")
# 🛠️ Convert model weights to TVM NDArrays

# Helper function to build, run, and measure average inference time (ms)
#module["main"].params is a list of all 63 parameters. The first is the input (x). The rest (62) are detached model weights (p_fc_weight, p_conv1_weight, etc.). You must pass all 63 in order when calling the VM
def run_and_time(module, input_data,params, target, n_repeat=10):
    ex = relax.build(module, target=target, params=params)
    vm = relax.VirtualMachine(ex, tvm.cpu())

     # Gather full list of arguments: [input] + [params in order]
    full_args = [*input_data]
    for param in module["main"].params[1:]:  # skip input[0], add weights
        full_args.append(params[param.name_hint])
        
    # Warmup
    vm["main"](*full_args)

    times = []
    for _ in range(n_repeat):
        start = time.time()
        vm["main"](*full_args)
        end = time.time()
        times.append(end - start)

    avg_ms = (sum(times) / n_repeat) * 1000
    return avg_ms

state_dict_mod = mod.state_dict()
params_mod = {k: tvm.nd.array(v.cpu().numpy()) for k, v in state_dict_mod.items()}
state_dict_modOpt = mod_optimized.state_dict()
params_modOpt = {k: tvm.nd.array(v.cpu().numpy()) for k, v in state_dict_modOpt.items()}
# Measure before tuning
time_before = run_and_time(mod, input_data,params_mod, target)
# Measure after tuning
time_after = run_and_time(mod_optimized, input_data,params_modOpt, target)

print(f"Average inference time before tuning: {time_before:.2f} ms")
print(f"Average inference time after tuning: {time_after:.2f} ms")

# Visualization
plt.bar(["Before Tuning", "After Tuning"], [time_before, time_after], color=["red", "green"])
plt.ylabel("Average Inference Time (ms)")
plt.title("ResNet18 Inference Performance Before vs After TVM Tuning")
plt.show()

AttributeError: <class 'tvm.ir.module.IRModule'> has no attribute state_dict

#https://tvm.apache.org/docs/v0.13.0/tutorial/tvmc_command_line_driver.html

For this tutorial, we will be working with ResNet-50 v2. ResNet-50 is a convolutional neural 
network that is 50 layers deep and designed to classify images. The model we will be using 
has been pre-trained on more than a million images with 1000 different classifications. 
The network has an input image size of 224x224. If you are interested exploring more of how 
the ResNet-50 model is structured, we recommend downloading Netron, a freely available
ML model viewer.

In [2]:
# The exclamation mark (!) tells Jupyter to run the line as a shell command, not Python.

!curl -L -o resnet50-v2-7.onnx https://github.com/onnx/models/raw/b9a54e89508f101a1611cd64f4ef56b9cb62c7cf/vision/classification/resnet/model/resnet50-v2-7.onnx
import os
print(os.path.exists("resnet50-v2-7.onnx"))  # should print True


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 97.6M  100 97.6M    0     0  4098k      0  0:00:24  0:00:24 --:--:-- 4678k
True


In [4]:
# This may take several minutes depending on your machine
!tvmc compile --target "llvm" \
--input-shapes "data:[1,3,224,224]" \
--output resnet50-v2-7-tvm.tar \
resnet50-v2-7.onnx


zsh:1: command not found: tvmc
