# Data Prep

In [1]:
%load_ext autoreload
%autoreload 2

import gc
import os
import pandas as pd
import tensorflow as tf
import keras
import torch
import numpy as np
import json

pd.options.display.max_columns = None
pd.options.display.max_rows = None

seed_num = 1337
np.random.seed(seed_num)
keras.utils.set_random_seed(seed_num)
tf.config.experimental.enable_op_determinism()
torch.manual_seed(seed_num)

2026-01-19 17:53:27.639421: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-01-19 17:53:27.747287: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-01-19 17:53:28.618168: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


<torch._C.Generator at 0x7fc793f54d30>

In [2]:
from rule4ml.parsers.data_parser import (
    default_board_map,
    default_hls4ml_map,
    default_layer_type_map,
    default_strategy_map,
    default_vivado_map,
    read_from_json,
    json_to_df,
)

In [3]:
base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
data_path = os.path.join(base_path, "datasets", "huggingface", "wa-hls4ml")

paths = {
    "train": os.path.join(data_path, "train"),
    "val": os.path.join(data_path, "val"),
    "test": os.path.join(data_path, "test"),
    "exemplar": os.path.join(data_path, "exemplar"),
}

global_categorical_maps = {
    "strategy": default_strategy_map,
    "board": default_board_map,
    "hls4ml_version": default_hls4ml_map,
    "vivado_version": default_vivado_map,
}
sequential_categorical_maps = {
    "layer_type": default_layer_type_map,
}

In [None]:
dataframes = {}
for key, path in paths.items():
    if os.path.exists(os.path.join(path, "data.feather")):
        df = pd.read_feather(os.path.join(path, "data.feather"))
        df["sequential_inputs"] = df["sequential_inputs"].apply(json.loads)
        df["sequential_inputs"] = df["sequential_inputs"].apply(
            lambda x: pd.DataFrame(x) if isinstance(x, list) else list(x)
        )
        dataframes[key] = df.dropna(subset=["bram", "dsp", "ff", "lut", "cycles", "interval"])
    else:
        repeat = 5 if key in ["train", "val"] else 1
        json_data = read_from_json(
            [
                os.path.join(path, "*2_20_merged.json"),
                os.path.join(path, "*2layer_merged.json"),
                os.path.join(path, "*3layer_merged.json"),
                os.path.join(path, "*latency_merged.json"),
                os.path.join(path, "*resource_merged.json"),
                os.path.join(path, "*exemplar_models.json"),
            ]
            + [
                os.path.join(path, "*conv1d_merged.json"),
                os.path.join(path, "*conv2d_merged.json")
            ] * repeat,
            batch_size=128,
            max_workers=16
        )
        if len(json_data):
            print(f"Loaded {len(json_data)} records from {key} set.")
        else:
            print(f"No data found for {key}. Skipping.")
            continue

        normalize = key in ["train", "val"]
        df = json_to_df(
            json_data,
            global_categorical_maps,
            sequential_categorical_maps,
            normalize=normalize,
            max_workers=16
        )
        if not df.empty:
            df_to_save = df
            df_to_save["sequential_inputs"] = df_to_save["sequential_inputs"].apply(
                lambda x: x.to_dict(orient="records") if isinstance(x, pd.DataFrame) else x
            )
            df_to_save["sequential_inputs"] = df_to_save["sequential_inputs"].apply(json.dumps)
            df_to_save.to_feather(os.path.join(paths[key], "data.feather"))

            # revert back to original format after saving
            df_to_save["sequential_inputs"] = df_to_save["sequential_inputs"].apply(json.loads)
            df_to_save["sequential_inputs"] = df_to_save["sequential_inputs"].apply(
                lambda x: pd.DataFrame(x) if isinstance(x, list) else list(x)
            )

        dataframes[key] = df.dropna(subset=["bram", "dsp", "ff", "lut", "cycles", "interval"])
        del json_data
        gc.collect()

Loaded 147192 records from test set.


  return pd.concat(dataframes, axis=0).reset_index(drop=True)


Loaded 887 records from exemplar set.


In [5]:
global_feature_labels = [
    "strategy",
    "board",
    "hls4ml_version",
    "vivado_version",
    # "clock_period",
    "bit_width",
    "reuse_mean",
    "dense_inputs_mean",
    "dense_outputs_mean",
    "dense_parameters_mean",
    "dense_reuse_mean",
    "dense_count",
    "conv1d_inputs_mean",
    "conv1d_outputs_mean",
    "conv1d_parameters_mean",
    "conv1d_filters_mean",
    "conv1d_kernel_size_mean",
    "conv1d_strides_mean",
    "conv1d_reuse_mean",
    "conv1d_count",
    "conv2d_inputs_mean",
    "conv2d_outputs_mean",
    "conv2d_parameters_mean",
    "conv2d_filters_mean",
    "conv2d_kernel_size_mean",
    "conv2d_strides_mean",
    "conv2d_reuse_mean",
    "conv2d_count",
    "batchnormalization_inputs_mean",
    "batchnormalization_outputs_mean",
    "batchnormalization_parameters_mean",
    "batchnormalization_count",
    "add_count",
    "concatenate_count",
    "dropout_count",
    "relu_count",
    "sigmoid_count",
    "tanh_count",
    "softmax_inputs_mean",
    "softmax_outputs_mean",
    "softmax_count",
    "total_add",
    "total_mult",
    "total_lookup",
    "total_logical"
]
sequential_feature_labels = [
    "layer_type",
    "layer_input_size",
    "layer_output_size",
    "layer_parameter_count",
    "layer_trainable_parameter_count",
    "layer_filters",
    "layer_kernel_height",
    "layer_kernel_width",
    "layer_stride_height",
    "layer_stride_width",
    "layer_reuse",
    "layer_op_add",
    "layer_op_mult",
    "layer_op_lookup",
    "layer_op_logical"
]
target_columns = ["bram", "dsp", "ff", "lut", "cycles", "interval"]

inputs_dfs = {}
targets_dfs = {}
for key, df in dataframes.items():
    inputs_dfs[key] = df[global_feature_labels].copy()
    if len(sequential_feature_labels) > 0:
        inputs_dfs[key]["sequential_inputs"] = df["sequential_inputs"].apply(
            lambda x: x[sequential_feature_labels]
        )
    targets_dfs[key] = df[target_columns].copy()

# Training

### MLP specific

In [None]:
from rule4ml.models.architectures import (
    MLPSettings,
    TorchMLP
)
from rule4ml.models.wrappers import TorchModelWrapper
from rule4ml.models.utils import torch_weights_init

target_labels = ["dsp"]

global_input_shape = (None, len(global_feature_labels))
output_shape = (None, len(target_labels))

mlp_settings = MLPSettings(
    embedding_layers=[16 for _ in range(len(global_categorical_maps))],
    numerical_dense_layers=[64],
    dense_layers=[128, 64, 32],
    dense_dropouts=[0.2, 0.2, 0.1],
)

cuda_available = False
device = "cuda" if cuda_available else "cpu"
if cuda_available:
    print("Using GPU for training.")
    torch.cuda.manual_seed(seed_num)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    print("Using CPU for training.")

import torch  # making sure torch is installed
torch_model = TorchMLP(
    settings=mlp_settings,
    input_shape=global_input_shape,
    output_shape=output_shape,
    categorical_maps=global_categorical_maps,
    name=f"{'-'.join([x.upper() for x in target_labels])}_MLP",
    device=device,
)

torch_wrapper = TorchModelWrapper()
torch_wrapper.set_model(torch_model)

### GNN specific

In [None]:
from rule4ml.models.architectures import (
    GNNSettings,
    TorchGNN,
)
from rule4ml.models.wrappers import (
    TorchModelWrapper,
)
from rule4ml.models.utils import torch_weights_init

target_labels = ["dsp"]

global_input_shape = (None, len(global_feature_labels))
sequential_input_shape = (None, len(sequential_feature_labels))
output_shape = (None, len(target_labels))

network_settings = {
    "bram": GNNSettings(
        global_embedding_layers=[8, 32, 16, 8],
        seq_embedding_layers=[8],
        numerical_dense_layers=[32, 32],
        gconv_layers=[64, 128, 16, 16],
        dense_layers=[32, 16],
        dense_dropouts=[0.1, 0.0],
    ),
    "dsp": GNNSettings(
        global_embedding_layers=[32, 16, 8, 32],
        seq_embedding_layers=[8],
        numerical_dense_layers=[32, 16],
        gconv_layers=[128, 32, 32],
        dense_layers=[32, 64, 32, 128, 128],
        dense_dropouts=[0.0, 0.0, 0.1, 0.0],
    ),
    "ff": GNNSettings(
        global_embedding_layers=[16, 8, 8, 16],
        seq_embedding_layers=[32],
        numerical_dense_layers=[32, 16],
        gconv_layers=[128, 16],
        dense_layers=[16, 64],
        dense_dropouts=[0.2, 0.2],
    ),
    "lut": GNNSettings(
        global_embedding_layers=[16, 32, 32, 32],
        seq_embedding_layers=[16],
        numerical_dense_layers=[],
        gconv_layers=[128, 64],
        dense_layers=[128, 128, 16, 128],
        bayesian_dense=[False, False, False, False],
        dense_dropouts=[0.1, 0.2, 0.0, 0.3],
        bayesian_output=False,
    )
}
gnn_settings = network_settings["_".join(target_labels)]

cuda_available = False
device = "cuda" if cuda_available else "cpu"
if cuda_available:
    print("Using GPU for training.")
    torch.cuda.manual_seed(seed_num)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
else:
    print("Using CPU for training.")

import torch  # making sure torch is installed
torch_model = TorchGNN(
    settings=gnn_settings,
    global_input_shape=global_input_shape,
    sequential_input_shape=sequential_input_shape,
    output_shape=output_shape,
    global_categorical_maps=global_categorical_maps,
    sequential_categorical_maps=sequential_categorical_maps,
    name=f"{'-'.join([x.upper() for x in target_labels])}_GNN",
    device=device,
)

torch_wrapper = TorchModelWrapper()
torch_wrapper.set_model(torch_model)

### Common

In [None]:
from rule4ml.models.callbacks import (
    EarlyStopping,
    ModelCheckpoint,
    ReduceLROnPlateau
)

callbacks = [
    EarlyStopping(
        monitor="val_loss",
        mode="min",
        patience=20,
        min_delta=0.0,
        restore_best=True
    ),
    ModelCheckpoint(
        dirpath=os.path.join(base_path, "notebooks", "models"),
        monitor="val_loss",
        mode="min"
    ),
    ReduceLROnPlateau(
        monitor="val_loss",
        mode="min",
        factor=0.5,
        patience=10,
        min_delta=0.0,
        min_lr=1e-6
    ),
]

In [None]:
from rule4ml.models.wrappers import TrainSettings
from rule4ml.models.metrics import TorchParametricSMAPE, TorchParametricR2
from rule4ml.models.utils import get_optimizer_from_str

scaler = None
metrics = [
    TorchParametricSMAPE(idx, name=f"smape_{target_labels[idx]}", eps=1e-6, scaler=scaler, device=device)
    for idx in range(len(target_labels))
]
metrics += [
    TorchParametricR2(idx, name=f"r2_{target_labels[idx]}", eps=1e-6, scaler=scaler, device=device)
    for idx in range(len(target_labels))
]

global_settings = {
    "bram": TrainSettings(
        num_epochs=50,
        batch_size=32,
        learning_rate=1e-4,
        loss_function="msle",
        optimizer="adam",
        metrics=metrics,
    ),
    "dsp": TrainSettings(
        num_epochs=50,
        batch_size=64,
        learning_rate=1e-4,
        loss_function="msle",
        optimizer="adam",
        metrics=metrics,
    ),
    "ff": TrainSettings(
        num_epochs=50,
        batch_size=64,
        learning_rate=1e-4,
        loss_function="msle",
        optimizer="adam",
        metrics=metrics,
    ),
    "lut": TrainSettings(
        num_epochs=50,
        batch_size=32,
        learning_rate=1e-4,
        loss_function="msle",
        optimizer="adam",
        metrics=metrics,
    )
}
train_settings = global_settings["_".join(target_labels)]

torch_wrapper.build_dataset(
    inputs_dfs["train"],
    targets_dfs["train"][target_labels],
    train_settings.batch_size,
    val_inputs_df=inputs_dfs["val"],
    val_targets_df=targets_dfs["val"][target_labels],
    train_repeats=1,
    shuffle=True,
    scaler=scaler
)

In [None]:
fit_history = torch_wrapper.fit(
    train_settings,
    callbacks=callbacks,
    verbose=1
)

In [None]:
import matplotlib.pyplot as plt

def plot_fit_history(fit_history, metric_name):
    plt.figure(figsize=(10, 5))
    plt.plot(fit_history["train"][metric_name], label="Train")
    if "val" in fit_history:
        plt.plot(fit_history["val"][metric_name], label="Validation")
    plt.title(f"Model {metric_name} history")
    plt.xlabel("Epoch")
    plt.ylabel(metric_name)
    plt.legend()
    plt.show()

plot_fit_history(fit_history, "loss")
plot_fit_history(fit_history, f"smape_{target_labels[0]}")
plot_fit_history(fit_history, f"r2_{target_labels[0]}")

In [None]:
print(f'Train Loss: {fit_history["train"]["loss"][-1]}')
print(f'Train SMAPE: {fit_history["train"][f"smape_{target_labels[0]}"][-1]}')
print(f'Train R2: {fit_history["train"][f"r2_{target_labels[0]}"][-1]}')

print("="*40)

print(f'Val Loss: {fit_history["val"]["loss"][-1]}')
print(f'Val SMAPE: {fit_history["val"][f"smape_{target_labels[0]}"][-1]}')
print(f'Val R2: {fit_history["val"][f"r2_{target_labels[0]}"][-1]}')

In [None]:
torch_wrapper.save("./models")