In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import hls4ml

print("TensorFlow:", tf.__version__)
print("hls4ml:", hls4ml.__version__)

2025-12-07 21:59:28.031836: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-07 21:59:28.072527: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-12-07 21:59:28.073653: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TensorFlow: 2.13.1
hls4ml: 0.8.1


In [2]:
# Hyperparameters
INPUT_DIM = 64   # 8x8
HIDDEN_DIM = 16
OUTPUT_DIM = 64
LR = 1e-3

# Define model
inputs = keras.Input(shape=(INPUT_DIM,))

# Encoder: 784 -> 32, ReLU
encoded = layers.Dense(HIDDEN_DIM, activation='relu', name="encoder_dense")(inputs)

# Decoder: 32 -> 784, Sigmoid
decoded = layers.Dense(OUTPUT_DIM, activation='sigmoid', name="decoder_dense")(encoded)

autoencoder = keras.Model(inputs=inputs, outputs=decoded, name="tiny_autoencoder")

autoencoder.summary()

# Compile (MSE loss, Adam optimizer)
autoencoder.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LR),
    loss='mse'
)

Model: "tiny_autoencoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 64)]              0         
                                                                 
 encoder_dense (Dense)       (None, 16)                1040      
                                                                 
 decoder_dense (Dense)       (None, 64)                1088      
                                                                 
Total params: 2128 (8.31 KB)
Trainable params: 2128 (8.31 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [3]:
# Load MNIST data
(x_train, _), (x_test, _) = keras.datasets.mnist.load_data()

# Normalize and flatten to 784-dim vectors
x_train = x_train.reshape(-1, 64).astype('float32') / 255.0
x_test  = x_test.reshape(-1, 64).astype('float32') / 255.0

print("Train shape:", x_train.shape)
print("Test shape:", x_test.shape)

EPOCHS = 5
BATCH_SIZE = 64

history = autoencoder.fit(
    x_train, x_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    shuffle=True,
    validation_data=(x_test, x_test)
)

Train shape: (735000, 64)
Test shape: (122500, 64)
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [4]:
# Create default hls4ml config from the Keras model
config = hls4ml.utils.config_from_keras_model(
    autoencoder,
    granularity='model'   # start simple; can switch to 'name' later for per-layer tuning
)

config['Model']['Strategy'] = 'Resource'
config['Model']['ReuseFactor'] = 128
# Increase precision so tiny values donâ€™t round to 0
config['Model']['Precision'] = 'ap_fixed<32,6>'

config

output_dir = 'tiny_ae_hls_keras'

hls_model = hls4ml.converters.convert_from_keras_model(
    autoencoder,
    hls_config=config,
    output_dir=output_dir,
    part='xc7z020clg400-1',   # PYNQ-Z2 FPGA
    backend='Vivado'          # or 'Vitis', depending on your tools
)

hls_model


Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64]], output shape: [None, 64]
Layer name: encoder_dense, layer type: Dense, input shapes: [[None, 64]], output shape: [None, 16]
Layer name: decoder_dense, layer type: Dense, input shapes: [[None, 16]], output shape: [None, 64]
Interpreting Model
Topology:
Layer name: input_1, layer type: InputLayer, input shapes: [[None, 64]], output shape: [None, 64]
Layer name: encoder_dense, layer type: Dense, input shapes: [[None, 64]], output shape: [None, 16]
Layer name: decoder_dense, layer type: Dense, input shapes: [[None, 16]], output shape: [None, 64]
Creating HLS model


<hls4ml.model.graph.ModelGraph at 0x7f462016ad60>

In [5]:
# 1) Compile HLS model (C-simulation)
hls_model.compile()

# 2) Compare Keras vs HLS on some random inputs
X_test_small = x_test[:10]  # 10 samples

y_keras = autoencoder.predict(X_test_small)
y_hls   = hls_model.predict(X_test_small)

print("Keras output (first sample, first 10 values):")
print(y_keras[0][:10])

print("\nHLS output (first sample, first 10 values):")
print(y_hls[0][:10])

# Need to run on computer install Vivado/Vitis
# # 3) Build the HLS project - synthesis + RTL
hls_model.build(csim=False, synth=True, vsynth=False)


Writing HLS project
Done


  saving_api.save_model(


Keras output (first sample, first 10 values):
[0.01215234 0.00770039 0.00716533 0.00728044 0.00851506 0.00858699
 0.00706759 0.00670178 0.00589997 0.0067759 ]

HLS output (first sample, first 10 values):
[0.01171875 0.0078125  0.00683594 0.00683594 0.0078125  0.0078125
 0.00683594 0.00585938 0.00585938 0.00585938]

****** Vitis HLS - High-Level Synthesis from C, C++ and OpenCL v2023.1 (64-bit)
  **** SW Build 3854077 on May  4 2023
  **** IP Build 3864474 on Sun May  7 20:36:21 MDT 2023
  **** SharedData Build 3865790 on Sun May 07 13:33:03 MDT 2023
    ** Copyright 1986-2022 Xilinx, Inc. All Rights Reserved.
    ** Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved.

source /tools/Xilinx/Vitis_HLS/2023.1/scripts/vitis_hls/hls.tcl -notrace
INFO: [HLS 200-10] Running '/tools/Xilinx/Vitis_HLS/2023.1/bin/unwrapped/lnx64.o/vitis_hls'
INFO: [HLS 200-10] For user 'ubuntu2004' on host 'ubuntu2004-virtual-machine' (Linux_x86_64 version 5.15.0-139-generic) on Sun Dec 07 22:01:

{'CSynthesisReport': {'TargetClockPeriod': '5.00',
  'EstimatedClockPeriod': '10.103',
  'BestLatency': '279',
  'WorstLatency': '281',
  'IntervalMin': '128',
  'IntervalMax': '128',
  'BRAM_18K': '46',
  'DSP': '64',
  'FF': '36559',
  'LUT': '34394',
  'URAM': '0',
  'AvailableBRAM_18K': '280',
  'AvailableDSP': '220',
  'AvailableFF': '106400',
  'AvailableLUT': '53200',
  'AvailableURAM': '0'}}

In [None]:
from skimage.metrics import structural_similarity as ssim
import numpy as np

X_eval = x_test[:1000]
y_pred = autoencoder.predict(X_eval)

# MSE
mse = np.mean((X_eval - y_pred)**2)

# MAE
mae = np.mean(np.abs(X_eval - y_pred))

# PSNR
max_pixel = 1.0
psnr = 20 * np.log10(max_pixel / np.sqrt(mse))

# SSIM (compute on 1 sample reshaped to 28x28)
sample_ssim = ssim(
    X_eval[0].reshape(8,8), 
    y_pred[0].reshape(8,8), 
    data_range=1.0
)

print("Keras Autoencoder")
print("MSE:", mse)
print("MAE:", mae)
print("PSNR:", psnr)
print("SSIM:", sample_ssim)


Keras Autoencoder
MSE: 0.0053007486
MAE: 0.02829784
PSNR: 22.75662804483607
SSIM: 0.6924620270729065


In [8]:
from skimage.metrics import structural_similarity as ssim
import numpy as np

X_eval = x_test[:1000]
y_pred = hls_model.predict(X_eval)

# MSE
mse = np.mean((X_eval - y_pred)**2)

# MAE
mae = np.mean(np.abs(X_eval - y_pred))

# PSNR
max_pixel = 1.0
psnr = 20 * np.log10(max_pixel / np.sqrt(mse))

# SSIM (compute on 1 sample reshaped to 28x28)
sample_ssim = ssim(
    X_eval[0].reshape(8,8), 
    y_pred[0].reshape(8,8), 
    data_range=1.0
)

print("HLS Autoencoder")
print("MSE:", mse)
print("MAE:", mae)
print("PSNR:", psnr)
print("SSIM:", sample_ssim)


HLS Autoencoder
MSE: 0.005293928
MAE: 0.027990932
PSNR: 22.76221991668944
SSIM: 0.7182971090078354
