In [41]:
import torch
import torchvision
from torchvision.models import ResNet18_Weights

import onnx
from onnxsim import simplify

# 1) Load a pretrained model (already trained; we're not training here)
model = torchvision.models.resnet18(weights=ResNet18_Weights.DEFAULT).eval()

# 2) Dummy input shape must match the model's expectation
dummy_input = torch.randn(1, 3, 224, 224)

# 3) Export to ONNX: this freezes the graph + weights into a portable file
torch.onnx.export(
    model,
    dummy_input,
    "resnet18.onnx",
    input_names=["input"],       # name used later in TVM runtime
    output_names=["logits"],
    opset_version=18,            # operator set version (compatibility)
    do_constant_folding=True,    # fold constants for small optimizations
    dynamic_axes=None,
    training=torch.onnx.TrainingMode.EVAL,
)

print("Wrote resnet18.onnx")

# # Simplify (folds shapes, resolves GlobalAveragePool/Flatten patterns)
# model_onnx = onnx.load("resnet18.onnx")
# model_simplified, check = simplify(model_onnx)
# assert check, "ONNX simplification failed"
# onnx.save(model_simplified, "resnet18_simplified.onnx")
# print("Wrote resnet18_simplified.onnx")

[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `ResNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Wrote resnet18.onnx


In [42]:
import onnx
from onnx import helper, numpy_helper

model = onnx.load("resnet18.onnx")
init_map = {init.name: init for init in model.graph.initializer}
touched = False

for node in model.graph.node:
    if node.op_type == "ReduceMean" and len(node.input) == 2:
        axes_name = node.input[1]
        axes_init = init_map.get(axes_name)
        if axes_init is None:
            continue
        axes = numpy_helper.to_array(axes_init).tolist()
        if not isinstance(axes, list):
            axes = [int(axes)]
        axes = [int(a) for a in axes]
        axes = sorted({a if a >= 0 else a + 4 for a in axes})  # convert negatives, keep order

        # keep the existing attrs (keepdims / noop_with_empty_axes), add axes attribute
        kept_attrs = [attr for attr in node.attribute if attr.name != "axes"]
        node.ClearField("attribute")
        for attr in kept_attrs:
            node.attribute.add().CopyFrom(attr)
        node.attribute.add().CopyFrom(helper.make_attribute("axes", axes))

        # drop the axes tensor input and corresponding initializer/value_info
        node.input.pop()
        model.graph.initializer.remove(axes_init)
        for i in range(len(model.graph.value_info) - 1, -1, -1):
            if model.graph.value_info[i].name == axes_name:
                del model.graph.value_info[i]
        touched = True

if not touched:
    raise RuntimeError("No ReduceMean nodes needed fixing")
onnx.save(model, "resnet18_fixed.onnx")


In [44]:
import onnx, tvm
from tvm import relay
from tvm.contrib import cc, ndk  # for (cross) compiling shared libs
from tvm.contrib import graph_executor

# --- Load your ONNX ---
onnx_model = onnx.load("resnet18_fixed.onnx")
input_name = onnx_model.graph.input[0].name
shape_dict = {input_name: (1, 3, 224, 224)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict, freeze_params=True)

# --- Choose ONE target (examples below) ---
# Native x86-64 CPU (auto-detect host CPU):
target = tvm.target.Target("llvm")

# Faster: specialize to a specific CPU (fill with your host CPU name if you want)
# target = tvm.target.Target("llvm -mcpu=skylake-avx512")

# NVIDIA GPU:
# target = tvm.target.Target("cuda")

# Vulkan (desktop/mobile GPUs):
# target = tvm.target.Target("vulkan")

# AMD GPU (ROCm):
# target = tvm.target.Target("rocm")

# Cross-compile to AArch64 Linux CPU (e.g., Jetson/Nano in CPU mode or ARM server):
# target = tvm.target.Target("llvm -mtriple=aarch64-linux-gnu -mattr=+neon")

# Android via NDK (CPU):
# target = tvm.target.Target("llvm -mtriple=armv7a-linux-android")

with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

  # Simple: produce a Linux .so next to your script
lib.export_library("resnet18_tvm.so")
print("Wrote resnet18_tvm.so")



One or more operators have not been tuned. Please tune your model for better performance. Use DEBUG logging level to see more details.


Wrote resnet18_tvm.so


In [None]:
import onnx
import numpy as np
import tvm
from tvm import relay

target = "llvm" 

# 4. Compile the Relay Module
# opt_level=3 applies aggressive optimizations
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

output_path = "resnet18_tvm.tar"
lib.export_library(output_path)

print(f"Successfully compiled and saved the deployable module to {output_path}")

# To check the contents (on Linux/macOS):

InternalError: Traceback (most recent call last):
  8: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::transform::Pass, tvm::IRModule)>::AssignTypedLambda<tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}>(tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}, std::string)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  7: tvm::transform::Pass::operator()(tvm::IRModule) const
  6: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  5: tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  4: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1}>(tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  3: tvm::relay::TypeInferencer::Infer(tvm::GlobalVar, tvm::relay::Function)
  2: tvm::relay::TypeSolver::Solve()
  1: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<bool (tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>::AssignTypedLambda<bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>(bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  0: tvm::relay::ReshapeRel(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)
  File "/workspace/tvm/src/relay/op/tensor/transform.cc", line 795
InternalError: Check failed: oshape_sum == data_shape_sum (512 vs. 3584) : Input tensor shape(1,512,7,1) and reshaped shape(1,512) are not compatible!

In [35]:
import onnx
import tvm
from tvm import relay
from tvm.contrib import graph_executor

# Load the ONNX model
onnx_model = onnx.load("resnet18_traced.onnx")

# Input shape dictionary. Ensure "input" matches the actual input name in your ONNX model.
# You can check this using onnx.load("resnet18.onnx").graph.input[0].name
input_name  = onnx_model.graph.input[0].name
input_shape = (1,3,224,224)
input_dtype = "float32"

# Import the ONNX model to TVM Relay format
# The error in your traceback (from_onnx(onnx_model, shape_dict)) implies onnx.mapping was called internally.
# Upgrading onnx should fix this if it's a version incompatibility.
# mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

shape_dict = {input_name: input_shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

# mod, params = relay.frontend.from_onnx(
#     onnx_model,
#     shape={input_name: input_shape},
#     dtype={input_name: input_dtype},
#     freeze_params=True,
# )

# Set the compilation target. "llvm" for CPU, "cuda" for NVIDIA GPUs.
target = "llvm" # or "cuda" if you have a GPU and CUDA installed

# Compile the model with TVM
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

# Save the compiled TVM artifacts
# These files can then be loaded by the TVM runtime for inference.
lib.export_library("deploy_lib_cpu.so")
with open("deploy_graph.json", "w") as f:
    f.write(lib.get_graph_json())
with open("deploy_params.params", "wb") as f:
    f.write(tvm.runtime.save_param_dict(lib.get_params()))

print("Compiled and exported TVM artifacts:")
print(" - deploy_lib_cpu.so")
print(" - deploy_graph.json")
print(" - deploy_params.params")

InternalError: Traceback (most recent call last):
  8: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::transform::Pass, tvm::IRModule)>::AssignTypedLambda<tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}>(tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}, std::string)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  7: tvm::transform::Pass::operator()(tvm::IRModule) const
  6: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  5: tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  4: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1}>(tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  3: tvm::relay::TypeInferencer::Infer(tvm::GlobalVar, tvm::relay::Function)
  2: tvm::relay::TypeSolver::Solve()
  1: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<bool (tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>::AssignTypedLambda<bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>(bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  0: tvm::relay::ReshapeRel(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)
  File "/workspace/tvm/src/relay/op/tensor/transform.cc", line 795
InternalError: Check failed: oshape_sum == data_shape_sum (512 vs. 3584) : Input tensor shape(1,512,7,1) and reshaped shape(1,512) are not compatible!

In [5]:
import tvm
from tvm import relay
from tvm.contrib import graph_executor
import onnx
import numpy as np # Import numpy for creating example input data

# Load the ONNX model
onnx_model = onnx.load("resnet18.onnx")

# --- IMPORTANT: Determine your model's input name and shape ---
# You'll need to inspect your ONNX model to get this.
# For ResNet18, a common input shape is (batch_size, channels, height, width)
# e.g., (1, 3, 224, 224) for a single image, 3 color channels, 224x224 pixels.
# The input name is often "input.1" or "input" or something similar.
# You can check this by printing onnx_model.graph.input
# For demonstration, let's assume "input.1" as the input name and shape (1, 3, 224, 224)

input_name = onnx_model.graph.input[0].name # Get the input name from the ONNX model
input_shape = (1, 3, 224, 224) # Define the desired input shape
input_type = "float32" # Define the input data type

# Create a dictionary for the input configuration
shape_config = {input_name: input_shape}
dtype_config = {input_name: input_type}

# Convert ONNX model to Relay
# Pass the input configuration as separate arguments
mod, params = relay.frontend.from_onnx(onnx_model, shape=shape_config, dtype=dtype_config)

# --- CHOOSE YOUR TARGET HERE ---
# Example 1: NVIDIA GPU
# target = "cuda"

# Example 2: Generic CPU
target = "llvm"

# -------------------------------

# Compile the model
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

print(f"Model compiled successfully for target: {target}")

# --- Optional: Run inference for verification ---
# If you want to run it, you'll need to create some dummy input data
dev = tvm.device(str(target), 0) # Use str(target) to ensure it's a string

# Create dummy input data
dummy_input = np.random.rand(*input_shape).astype(input_type)

# Load the compiled module
module = graph_executor.GraphModule(lib["default"](dev))

# Set input
module.set_input(input_name, tvm.nd.array(dummy_input))

# Execute
module.run()

# Get output (assuming one output)
output = module.get_output(0).asnumpy()
print(f"Inference successful. Output shape: {output.shape}")

InternalError: Traceback (most recent call last):
  8: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::transform::Pass, tvm::IRModule)>::AssignTypedLambda<tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}>(tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}, std::string)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  7: tvm::transform::Pass::operator()(tvm::IRModule) const
  6: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  5: tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  4: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1}>(tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  3: tvm::relay::TypeInferencer::Infer(tvm::GlobalVar, tvm::relay::Function)
  2: tvm::relay::TypeSolver::Solve()
  1: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<bool (tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>::AssignTypedLambda<bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>(bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  0: tvm::relay::ReshapeRel(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)
  File "/workspace/tvm/src/relay/op/tensor/transform.cc", line 795
InternalError: Check failed: oshape_sum == data_shape_sum (512 vs. 3584) : Input tensor shape(1,512,7,1) and reshaped shape(1,512) are not compatible!

In [39]:
# 1) Export to ONNX
import torch
import torchvision.models as models

model = models.mobilenet_v2(weights=None)  # or weights='IMAGENET1K_V1' if you want pretrained
model.eval()

dummy = torch.randn(1, 3, 224, 224)
torch.onnx.export(
    model, dummy, "mobilenetv2.onnx",
    input_names=["input"], output_names=["logits"],
    opset_version=13,
    do_constant_folding=True,
    dynamic_axes={"input": {0: "batch"}, "logits": {0: "batch"}}
)
print("Saved mobilenetv2.onnx")


  torch.onnx.export(
W1024 00:54:14.648000 3436 torch/onnx/_internal/exporter/_compat.py:114] Setting ONNX exporter to use operator set version 18 because the requested opset_version 13 is a lower version than we have implementations for. Automatic version conversion will be performed, which may not be successful at converting to the requested version. If version conversion is unsuccessful, the opset version of the exported model will be kept at 18. Please consider setting opset_version >=18 to leverage latest ONNX features


[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `MobileNetV2([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...


The model version conversion is not supported by the onnxscript version converter and fallback is enabled. The model will be converted using the onnx C API (target version: 13).
Failed to convert the model to the target version 13 using the ONNX C API. The model was not modified
Traceback (most recent call last):
  File "/home/vscode/.local/lib/python3.11/site-packages/onnxscript/version_converter/__init__.py", line 127, in call
    converted_proto = _c_api_utils.call_onnx_api(
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/vscode/.local/lib/python3.11/site-packages/onnxscript/version_converter/_c_api_utils.py", line 65, in call_onnx_api
    result = func(proto)
             ^^^^^^^^^^^
  File "/home/vscode/.local/lib/python3.11/site-packages/onnxscript/version_converter/__init__.py", line 122, in _partial_convert_version
    return onnx.version_converter.convert_version(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/vscode/.local/lib/python3.11/site

[torch.onnx] Translate the graph into ONNX... ✅
Saved mobilenetv2.onnx


In [40]:
# 2) Compile with TVM
import onnx, tvm
from tvm import relay
from tvm.contrib import graph_executor
import numpy as np

onnx_model = onnx.load("mobilenetv2.onnx")

shape_dict = {"input": (1, 3, 224, 224)}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)

target = "llvm"              # CPU
# target = tvm.target.cuda() # or "cuda" if you have GPU
with tvm.transform.PassContext(opt_level=3):
    lib = relay.build(mod, target=target, params=params)

dev = tvm.cpu(0)             # or tvm.cuda(0)
module = graph_executor.GraphModule(lib["default"](dev))

# run a quick check
inp = np.random.randn(1, 3, 224, 224).astype("float32")
module.set_input("input", tvm.nd.array(inp))
module.run()
out = module.get_output(0).numpy()
print(out.shape)  # (1, 1000)


InternalError: Traceback (most recent call last):
  8: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::transform::Pass, tvm::IRModule)>::AssignTypedLambda<tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}>(tvm::transform::{lambda(tvm::transform::Pass, tvm::IRModule)#7}, std::string)::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  7: tvm::transform::Pass::operator()(tvm::IRModule) const
  6: tvm::transform::Pass::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  5: tvm::transform::ModulePassNode::operator()(tvm::IRModule, tvm::transform::PassContext const&) const
  4: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<tvm::IRModule (tvm::IRModule, tvm::transform::PassContext)>::AssignTypedLambda<tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1}>(tvm::relay::transform::InferType()::{lambda(tvm::IRModule, tvm::transform::PassContext const&)#1})::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  3: tvm::relay::TypeInferencer::Infer(tvm::GlobalVar, tvm::relay::Function)
  2: tvm::relay::TypeSolver::Solve()
  1: tvm::runtime::PackedFuncObj::Extractor<tvm::runtime::PackedFuncSubObj<tvm::runtime::TypedPackedFunc<bool (tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>::AssignTypedLambda<bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)>(bool (*)(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&))::{lambda(tvm::runtime::TVMArgs const&, tvm::runtime::TVMRetValue*)#1}> >::Call(tvm::runtime::PackedFuncObj const*, tvm::runtime::TVMArgs, tvm::runtime::TVMRetValue*)
  0: tvm::relay::ReshapeRel(tvm::runtime::Array<tvm::Type, void> const&, int, tvm::Attrs const&, tvm::TypeReporter const&)
  File "/workspace/tvm/src/relay/op/tensor/transform.cc", line 795
InternalError: Check failed: oshape_sum == data_shape_sum (1280 vs. 8960) : Input tensor shape(1,1280,7,1) and reshaped shape(1,1280) are not compatible!