# Evaluation

Copyright 2025 Universitat Politècnica de Catalunya

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
import numpy as np
from utils import prepare_targets_and_mask, load_dataset, seg_to_global_reshape
from models import RouteNetGauss

import pickle


2025-01-15 12:10:21.013425: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-15 12:10:21.031249: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-15 12:10:21.031268: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-15 12:10:21.031787: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-15 12:10:21.034776: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-01-15 12:10:21.034914: I tensorflow/core/platform/cpu_feature_guard.cc:1

## Load datasets

In [2]:
def load_and_format_dataset(path:str, metric:str) -> tf.data.Dataset:
    """Loads and formats the dataset for evaluation

    Parameters
    ----------
    path : str
        Name of the dataset and partition [training/validation/test] to load, in format
        '{name}/{partition}'.
    metric : str
        Metric to load [delay, jitter].

    Returns
    -------
    tf.data.Dataset
        Formatted dataset.
    """
    targets = [
        f"flow_avg_{metric}",
        f"flow_p50_{metric}",
        f"flow_p90_{metric}",
        f"flow_p95_{metric}",
        f"flow_p99_{metric}",
    ]
    mask = f"flow_has_{metric}"
    return load_dataset(path).map(prepare_targets_and_mask(targets, mask))

def ds_to_dict(ds:tf.data.Dataset) -> dict:
    """Generates a dict with the "sample_idx" feature as key and samples as values.

    Parameters
    ----------
    ds : tf.data.Dataset
        Dataset to convert.

    Returns
    -------
    dict
        Resulting dictionary.
    """
    return {x["sample_idx"].numpy(): (x, y) for x, y in ds}

In [3]:
ds_mawi_pcaps_delay = load_and_format_dataset("mawi_pcaps/test", "delay")
ds_mawi_pcaps_jitter = load_and_format_dataset("mawi_pcaps/test", "jitter")
ds_mawi_pcaps_simulated_delay = load_and_format_dataset("mawi_pcaps_simulated/test", "delay")
ds_mawi_pcaps_simulated_jitter = load_and_format_dataset("mawi_pcaps_simulated/test", "jitter")

ds_trex_multiburst_delay = load_and_format_dataset("trex_multiburst/test", "delay")
ds_trex_multiburst_jitter = load_and_format_dataset("trex_multiburst/test", "jitter")
ds_trex_multiburst_simulated_delay = load_and_format_dataset("trex_multiburst_simulated/test", "delay")
ds_trex_multiburst_simulated_jitter = load_and_format_dataset("trex_multiburst_simulated/test", "jitter")

ds_trex_synthetic_delay = load_and_format_dataset("trex_synthetic/test", "delay")
ds_trex_synthetic_jitter = load_and_format_dataset("trex_synthetic/test", "jitter")
ds_trex_synthetic_simulated_delay = load_and_format_dataset("trex_synthetic_simulated/test", "delay")
ds_trex_synthetic_simulated_jitter = load_and_format_dataset("trex_synthetic_simulated/test", "jitter")

2025-01-15 12:10:22.144386: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2025-01-15 12:10:22.144424: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: pluto
2025-01-15 12:10:22.144431: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: pluto
2025-01-15 12:10:22.144496: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 515.65.1
2025-01-15 12:10:22.144514: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 515.65.1
2025-01-15 12:10:22.144519: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:241] kernel version seems to match DSO: 515.65.1


## Load models

In [4]:
def load_model(
    model_id: str,
    checkpoint: str,
    metric: str,
    inference_mode:bool=True,
) -> RouteNetGauss:
    """Loads a SPTGNN_node_queue_v4_multiple_out model from a checkpoint.

    Parameters
    ----------
    model_id : str
        Experiment identifier.
    checkpoint : str
        Checkpoint to load. To be loaded with tf.keras.Model.load_weights.
    metric : str
        Metric to load [delay, jitter].
    inference_mode : bool, optional
        Activate inference_mode in SPTGNN_node_queue_v4_multiple_out, by default True.

    Returns
    -------
    SPTGNN_node_queue_v4_multiple_out
        Loaded model.
    """
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0)
    loss = tf.keras.losses.MeanAbsolutePercentageError()
    with open(f"normalization/{model_id}/z_scores.pkl", "rb") as ff:
        z_scores = pickle.load(ff)

    model = RouteNetGauss(
        output_dim=5,
        mask_field=f"flow_has_{metric}",
        inference_mode=inference_mode,
        use_trans_delay = metric == "delay",
        z_scores=z_scores,
    )
    model.compile(optimizer=optimizer, loss=loss)
    model.load_weights(f"ckpt/{model_id}/{checkpoint}").expect_partial()
    return model

In [5]:
model_mawi_pcaps_delay = load_model(
    "paper_weights/mawi_pcaps/RouteNetGauss/delay",
    "178-22.4136",
    "delay",
)
model_mawi_pcaps_jitter = load_model(
    "paper_weights/mawi_pcaps/RouteNetGauss/jitter",
    "183-16.1764",
    "jitter",
)

model_trex_multiburst_delay = load_model(
    "paper_weights/trex_multiburst_filtered/RouteNetGauss/delay",
    "214-4.7122",
    "delay",
)
model_trex_multiburst_jitter = load_model(
    "paper_weights/trex_multiburst/RouteNetGauss/jitter",
    "242-11.5459",
    "jitter",
)

model_trex_synthetic_delay = load_model(
    "paper_weights/trex_synthetic_filtered/RouteNetGauss/delay",
    "221-2.8343",
    "delay",
)

model_trex_synthetic_jitter = load_model(
    "paper_weights/trex_synthetic/RouteNetGauss/jitter",
    "244-9.8195",
    "jitter",
)

## Evaluation results

In [6]:
def mape(y_true, y_pred):
    return f"{np.mean(np.abs((y_true - y_pred) / y_true)) * 100:.3f}%"


def mae(y_true, y_pred):
    return f"{np.mean(np.abs((y_true - y_pred)) * 1e6):.3f}μs"


def r2(y_true, y_pred):
    return f"{1 - np.sum(np.square(y_true - y_pred)) / np.sum(np.square(y_true - np.mean(y_true))):.3f}"


def concatenate_ds(ds: tf.data.Dataset) -> np.ndarray:
    """Transforms dataset into a numpy array. Used for evaluation.

    Parameters
    ----------
    ds : tf.data.Dataset
        Dataset to transform.

    Returns
    -------
    np.ndarray
        Numpy array with the concatenated targets.
    """
    res = [y.numpy() for _, y in iter(ds)]
    return np.concatenate(res, axis=0)


def concatenate_ds_with_donor_mask(
    ds: tf.data.Dataset, mask_ds: tf.data.Dataset, metric: str = "jitter"
) -> np.ndarray:
    """Transforms dataset into a numpy array. Windows are selected to match mask_ds.
    Used for evaluation.

    Parameters
    ----------
    ds : tf.data.Dataset
        Dataset to transform.
    
        ds : tf.data.Dataset
        Dataset to transform.

    Returns
    -------
    np.ndarray
        Numpy array with the concatenated targets.
    """
    res = []
    targets = [
        f"flow_avg_{metric}",
        f"flow_p50_{metric}",
        f"flow_p90_{metric}",
        f"flow_p95_{metric}",
        f"flow_p99_{metric}",
    ]
    mask = f"flow_has_{metric}"
    for x, _ in iter(ds):
        mask_field = mask_ds[x["sample_idx"].numpy()][0][mask]
        reshaped_mask = tf.expand_dims(seg_to_global_reshape(mask_field, num_dims=2), 1)
        val = tf.concat(
            [
                tf.reshape(
                    tf.boolean_mask(seg_to_global_reshape(x[target]), reshaped_mask),
                    (-1, 1),
                )
                for target in targets
            ],
            axis=1,
        )
        res.append(val.numpy())
    total = np.concatenate(res, axis=0)
    total[total <= 0] = 0
    return total


def evaluate_model_vs_sim(
    true_ds: tf.data.Dataset,
    simulated_ds: tf.data.Dataset,
    model: RouteNetGauss,
    metric: str,
) -> None:
    """Generates a summary report comparing the model's predictions against the
    simulator's.

    Parameters
    ----------
    true_ds : tf.data.Dataset
        Dataset representing the ground truth (testbed).
    simulated_ds : tf.data.Dataset
        Dataset representing the simulator's prediction of the ground truth.
    model : RouteNetGauss
        Trained RouteNet-Gauss model.
    metric : str
        Name of perfomance metric [delay, jitter] to evaluate.
    """

    numpy_true_ds = concatenate_ds(true_ds)
    numpy_pred_ds = model.predict(true_ds)
    numpy_simulated_ds = concatenate_ds_with_donor_mask(
            simulated_ds, ds_to_dict(true_ds), metric.lower()
        )

    for ii, agg in enumerate(
        ["Average", "Median", "90th Percentile", "95th Percentile", "99th Percentile"]
    ):
        for err_metric in [mape, mae, r2]:
            print(
                f"{agg} {metric} ({err_metric.__name__}):",
                f"RouteNet-Gauss {err_metric(numpy_true_ds[:, ii], numpy_pred_ds[:, ii])}",
                f"OMNeT++ {err_metric(numpy_true_ds[:, ii], numpy_simulated_ds[:, ii])}",
            )
        print()

### TREX Synthetic

In [7]:
evaluate_model_vs_sim(
    ds_trex_synthetic_delay,
    ds_trex_synthetic_simulated_delay,
    model_trex_synthetic_delay,
    "Delay",
)

Average Delay (mape): RouteNet-Gauss 2.604% OMNeT++ 53.684%
Average Delay (mae): RouteNet-Gauss 3.128μs OMNeT++ 63.486μs
Average Delay (r2): RouteNet-Gauss 0.941 OMNeT++ -4.337

Median Delay (mape): RouteNet-Gauss 2.790% OMNeT++ 54.325%
Median Delay (mae): RouteNet-Gauss 3.305μs OMNeT++ 63.714μs
Median Delay (r2): RouteNet-Gauss 0.939 OMNeT++ -4.389

90th Percentile Delay (mape): RouteNet-Gauss 2.418% OMNeT++ 47.311%
90th Percentile Delay (mae): RouteNet-Gauss 3.136μs OMNeT++ 61.355μs
90th Percentile Delay (r2): RouteNet-Gauss 0.942 OMNeT++ -3.741

95th Percentile Delay (mape): RouteNet-Gauss 2.591% OMNeT++ 45.609%
95th Percentile Delay (mae): RouteNet-Gauss 3.442μs OMNeT++ 60.859μs
95th Percentile Delay (r2): RouteNet-Gauss 0.935 OMNeT++ -3.553

99th Percentile Delay (mape): RouteNet-Gauss 3.121% OMNeT++ 42.814%
99th Percentile Delay (mae): RouteNet-Gauss 4.351μs OMNeT++ 60.112μs
99th Percentile Delay (r2): RouteNet-Gauss 0.909 OMNeT++ -3.184



In [8]:
evaluate_model_vs_sim(
    ds_trex_synthetic_jitter,
    ds_trex_synthetic_simulated_jitter,
    model_trex_synthetic_jitter,
    "Jitter",
)

Average Jitter (mape): RouteNet-Gauss 9.447% OMNeT++ 24.999%
Average Jitter (mae): RouteNet-Gauss 0.792μs OMNeT++ 2.122μs
Average Jitter (r2): RouteNet-Gauss 0.757 OMNeT++ -0.584

Median Jitter (mape): RouteNet-Gauss 11.881% OMNeT++ 27.549%
Median Jitter (mae): RouteNet-Gauss 0.814μs OMNeT++ 1.919μs
Median Jitter (r2): RouteNet-Gauss 0.711 OMNeT++ -0.509

90th Percentile Jitter (mape): RouteNet-Gauss 9.347% OMNeT++ 25.221%
90th Percentile Jitter (mae): RouteNet-Gauss 1.610μs OMNeT++ 4.447μs
90th Percentile Jitter (r2): RouteNet-Gauss 0.754 OMNeT++ -0.799

95th Percentile Jitter (mape): RouteNet-Gauss 9.074% OMNeT++ 24.172%
95th Percentile Jitter (mae): RouteNet-Gauss 1.866μs OMNeT++ 5.118μs
95th Percentile Jitter (r2): RouteNet-Gauss 0.748 OMNeT++ -0.702

99th Percentile Jitter (mape): RouteNet-Gauss 9.609% OMNeT++ 23.097%
99th Percentile Jitter (mae): RouteNet-Gauss 2.558μs OMNeT++ 6.418μs
99th Percentile Jitter (r2): RouteNet-Gauss 0.700 OMNeT++ -0.482



### TREX MULTIBURST

In [9]:
evaluate_model_vs_sim(
    ds_trex_multiburst_delay,
    ds_trex_multiburst_simulated_delay,
    model_trex_multiburst_delay,
    "Delay",
)

Average Delay (mape): RouteNet-Gauss 2.277% OMNeT++ 56.122%
Average Delay (mae): RouteNet-Gauss 2.809μs OMNeT++ 67.807μs
Average Delay (r2): RouteNet-Gauss 0.921 OMNeT++ -4.508

Median Delay (mape): RouteNet-Gauss 2.480% OMNeT++ 57.353%
Median Delay (mae): RouteNet-Gauss 3.006μs OMNeT++ 68.646μs
Median Delay (r2): RouteNet-Gauss 0.924 OMNeT++ -4.689

90th Percentile Delay (mape): RouteNet-Gauss 2.289% OMNeT++ 48.504%
90th Percentile Delay (mae): RouteNet-Gauss 3.119μs OMNeT++ 64.347μs
90th Percentile Delay (r2): RouteNet-Gauss 0.833 OMNeT++ -3.311

95th Percentile Delay (mape): RouteNet-Gauss 2.529% OMNeT++ 46.355%
95th Percentile Delay (mae): RouteNet-Gauss 3.523μs OMNeT++ 63.354μs
95th Percentile Delay (r2): RouteNet-Gauss 0.826 OMNeT++ -3.095

99th Percentile Delay (mape): RouteNet-Gauss 3.188% OMNeT++ 43.007%
99th Percentile Delay (mae): RouteNet-Gauss 4.614μs OMNeT++ 61.940μs
99th Percentile Delay (r2): RouteNet-Gauss 0.814 OMNeT++ -2.784



In [10]:
evaluate_model_vs_sim(
    ds_trex_multiburst_jitter,
    ds_trex_multiburst_simulated_jitter,
    model_trex_multiburst_jitter,
    "Jitter",
)

Average Jitter (mape): RouteNet-Gauss 10.711% OMNeT++ 37.435%
Average Jitter (mae): RouteNet-Gauss 0.967μs OMNeT++ 3.244μs
Average Jitter (r2): RouteNet-Gauss 0.529 OMNeT++ -1.980

Median Jitter (mape): RouteNet-Gauss 13.680% OMNeT++ 41.358%
Median Jitter (mae): RouteNet-Gauss 1.022μs OMNeT++ 2.926μs
Median Jitter (r2): RouteNet-Gauss 0.431 OMNeT++ -1.790

90th Percentile Jitter (mape): RouteNet-Gauss 10.934% OMNeT++ 39.311%
90th Percentile Jitter (mae): RouteNet-Gauss 1.949μs OMNeT++ 7.005μs
90th Percentile Jitter (r2): RouteNet-Gauss 0.625 OMNeT++ -2.488

95th Percentile Jitter (mape): RouteNet-Gauss 10.913% OMNeT++ 37.906%
95th Percentile Jitter (mae): RouteNet-Gauss 2.281μs OMNeT++ 8.055μs
95th Percentile Jitter (r2): RouteNet-Gauss 0.632 OMNeT++ -2.230

99th Percentile Jitter (mape): RouteNet-Gauss 11.824% OMNeT++ 35.792%
99th Percentile Jitter (mae): RouteNet-Gauss 3.112μs OMNeT++ 9.818μs
99th Percentile Jitter (r2): RouteNet-Gauss 0.633 OMNeT++ -1.657



### MAWI PCAPS

In [11]:
evaluate_model_vs_sim(
    ds_mawi_pcaps_delay,
    ds_mawi_pcaps_simulated_delay,
    model_mawi_pcaps_delay,
    "Delay",
)

Average Delay (mape): RouteNet-Gauss 14.749% OMNeT++ 93.075%
Average Delay (mae): RouteNet-Gauss 47.217μs OMNeT++ 126.088μs
Average Delay (r2): RouteNet-Gauss 0.055 OMNeT++ -0.991

Median Delay (mape): RouteNet-Gauss 12.372% OMNeT++ 82.611%
Median Delay (mae): RouteNet-Gauss 27.522μs OMNeT++ 90.462μs
Median Delay (r2): RouteNet-Gauss 0.045 OMNeT++ -1.017

90th Percentile Delay (mape): RouteNet-Gauss 20.122% OMNeT++ 131.792%
90th Percentile Delay (mae): RouteNet-Gauss 134.366μs OMNeT++ 283.835μs
90th Percentile Delay (r2): RouteNet-Gauss 0.011 OMNeT++ -0.961

95th Percentile Delay (mape): RouteNet-Gauss 24.316% OMNeT++ 147.495%
95th Percentile Delay (mae): RouteNet-Gauss 187.471μs OMNeT++ 368.629μs
95th Percentile Delay (r2): RouteNet-Gauss -0.005 OMNeT++ -0.934

99th Percentile Delay (mape): RouteNet-Gauss 32.889% OMNeT++ 158.056%
99th Percentile Delay (mae): RouteNet-Gauss 289.536μs OMNeT++ 503.644μs
99th Percentile Delay (r2): RouteNet-Gauss -0.037 OMNeT++ -0.770



In [12]:
evaluate_model_vs_sim(
    ds_mawi_pcaps_jitter,
    ds_mawi_pcaps_simulated_jitter,
    model_mawi_pcaps_jitter,
    "Jitter",
)

Average Jitter (mape): RouteNet-Gauss 14.445% OMNeT++ 46.439%
Average Jitter (mae): RouteNet-Gauss 2.335μs OMNeT++ 6.590μs
Average Jitter (r2): RouteNet-Gauss 0.590 OMNeT++ -0.959

Median Jitter (mape): RouteNet-Gauss 21.518% OMNeT++ 84.287%
Median Jitter (mae): RouteNet-Gauss 1.484μs OMNeT++ 4.897μs
Median Jitter (r2): RouteNet-Gauss 0.701 OMNeT++ -1.096

90th Percentile Jitter (mape): RouteNet-Gauss 14.994% OMNeT++ 41.095%
90th Percentile Jitter (mae): RouteNet-Gauss 6.047μs OMNeT++ 14.681μs
90th Percentile Jitter (r2): RouteNet-Gauss 0.468 OMNeT++ -0.912

95th Percentile Jitter (mape): RouteNet-Gauss 13.621% OMNeT++ 38.560%
95th Percentile Jitter (mae): RouteNet-Gauss 8.106μs OMNeT++ 19.653μs
95th Percentile Jitter (r2): RouteNet-Gauss 0.405 OMNeT++ -0.868

99th Percentile Jitter (mape): RouteNet-Gauss 16.493% OMNeT++ 48.971%
99th Percentile Jitter (mae): RouteNet-Gauss 20.527μs OMNeT++ 43.000μs
99th Percentile Jitter (r2): RouteNet-Gauss 0.271 OMNeT++ -0.764

