In [1]:
import sys
from pathlib import Path
import yaml
import numpy as np

import torch
import torch.nn as nn
import onnx
import onnxruntime
import numpy as np
from onnxsim import simplify
from mlp import MLP

In [2]:
device = 'cpu'
with open('checkpoints/config.yaml', 'r', encoding='UTF-8') as handle:
    config = yaml.safe_load(handle)
print(config)
model = MLP(**config['model'])

{'checkpointing': {'checkpoint_path': '/home/yhuang2/PROJs/RealTimeAlignment/train/mlp_v2/checkpoints', 'resume': True, 'save_frequency': 20}, 'data': {'mode': 'raw', 'num_particles': 50, 'rounded': False}, 'model': {'embedding_features': [128, 128], 'in_features': 6, 'out_features': 27, 'rezero': True, 'norm': None, 'activ': {'name': 'leakyrelu', 'negative_slope': 0.1}, 'subset_config': [[6, 128, 128, 128, 128], [6, 128, 128, 128, 128], [6, 128, 128, 128, 128]]}, 'train': {'batch_size': 64, 'learning_rate': 0.0001, 'num_epochs': 200, 'num_warmup_epochs': 50, 'sched_gamma': 0.95, 'sched_steps': 20}}


In [3]:
ckpt_path = 'checkpoints/ckpt_last.path'
ckpt = torch.load(ckpt_path, map_location='cpu')
model_state_dict = ckpt['model']
model.load_state_dict(model_state_dict)
model.eval()

MLP(
  (embed): Sequential(
    (0): Identity()
    (1): Linear(in_features=6, out_features=128, bias=True)
    (2): LeakyReLU(negative_slope=0.1)
    (3): Identity()
    (4): Linear(in_features=128, out_features=128, bias=True)
    (5): LeakyReLU(negative_slope=0.1)
  )
  (solvers): ModuleList(
    (0-2): 3 x SubsetSolver(
      (model): Sequential(
        (0): Identity()
        (1): Linear(in_features=768, out_features=128, bias=True)
        (2): LeakyReLU(negative_slope=0.1)
        (3): ResLinear(
          (norm_layer): Identity()
          (linear): Linear(in_features=128, out_features=128, bias=True)
          (activ): LeakyReLU(negative_slope=0.1)
        )
        (4): ResLinear(
          (norm_layer): Identity()
          (linear): Linear(in_features=128, out_features=128, bias=True)
          (activ): LeakyReLU(negative_slope=0.1)
        )
        (5): ResLinear(
          (norm_layer): Identity()
          (linear): Linear(in_features=128, out_features=128, bias=True)


In [4]:
# Create dummy input with the correct shape
in_features = config['model']['in_features']
num_particles = config['data']['num_particles']
dummy_input = torch.randn(1, num_particles, in_features)

# Export to ONNX
torch.onnx.export(
    model,                      # model being run
    dummy_input,                # model input (or a tuple for multiple inputs)
    "mlp.onnx",                 # where to save the model (filename)
    export_params=True,         # store the trained weights inside the model
    opset_version=11,           # the ONNX version to export to (11 is widely supported)
    do_constant_folding=True,   # optimize constants
    input_names=['input'],      # input name (can be arbitrary)
    output_names=['output'],    # output name
    dynamic_axes={              # support dynamic batch size
        'input': {0: 'batch_size'},
        'output': {0: 'batch_size'},
    }
)

print("Model has been exported to ONNX format.")

Model has been exported to ONNX format.


In [5]:
onnx_path = 'mlp.onnx' 
onnx_model = onnx.load(onnx_path)
onnx.checker.check_model(onnx_model)

print("ONNX model is valid!")

ONNX model is valid!


In [6]:
import onnx
from onnxsim import simplify

onnx_model = onnx.load("mlp.onnx")
model_simp, check = simplify(onnx_model)
assert check, "Simplified ONNX model could not be validated"
onnx.save(model_simp, "mlp_simplified.onnx")

In [7]:
import hls4ml

config = hls4ml.utils.config_from_onnx_model('mlp.onnx')
config['Model']['Strategy'] = 'Latency'  # or 'Resource'
config['Model']['Precision'] = 'ap_fixed<16,6>'
config['IOType'] = 'io_stream'

2025-07-04 18:13:38.842723: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-04 18:13:38.851864: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751667218.863002  524516 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751667218.866803  524516 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751667218.877980  524516 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [8]:
output_dir = 'hls4ml_mlp_project'
hls_model = hls4ml.converters.convert_from_onnx_model(
    'mlp_simplified.onnx',
    hls_config=config,
    output_dir=output_dir,
    backend = 'Vivado',
    part='xcu250-figd2104-2L-e'
)

Interpreting Model ...
Output layers:  ['/output/Gemm']
Input shape: [None, 50, 6]
Topology:
Layer name: /embed/embed.1/MatMul, layer type: Dense, current shape: [[None, 50, 6]]
Layer name: /embed/embed.1/Add, layer type: BiasAdd, current shape: [[None, 50, 128]]
Layer name: /embed/embed.2/LeakyRelu, layer type: LeakyReLU, current shape: [[None, 50, 128]]
Layer name: /embed/embed.4/MatMul, layer type: Dense, current shape: [[None, 50, 128]]
Layer name: /embed/embed.4/Add, layer type: BiasAdd, current shape: [[None, 50, 128]]
Layer name: /embed/embed.5/LeakyRelu, layer type: LeakyReLU, current shape: [[None, 50, 128]]


Exception: ERROR: Unsupported operation type: Slice