# Evaluation
In this notebook you can load the checkpoints and evaluate

## License


Copyright 2024 Universitat Politècnica de Catalunya

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

In [1]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
import pickle
from utils import prepare_targets_and_mask
from models import RouteNet_temporal_delay

# RUN EAGERLY -> True for debugging
RUN_EAGERLY = False
tf.config.run_functions_eagerly(RUN_EAGERLY)

2024-11-23 14:00:23.116293: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-23 14:00:23.135444: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-11-23 14:00:23.135463: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-11-23 14:00:23.136012: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-23 14:00:23.139218: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-23 14:00:23.139551: I tensorflow/core/platform/cpu_feature_guard.cc:1

## Datasets

In [2]:
def load_dataset(
    dir_path: str,
    target_transform_mapping=prepare_targets_and_mask(
        ["flow_avg_delay_per_seg"], "flow_has_delay"
    ),
    partitions=["validation", "test"],
):
    """
    Loads a dataset from the specified directory path.

    Parameters
    ----------
    dir_path: str
        Path to dataset directory.
    target_transform_mapping: Optional[Callable]
        A map function to preprocess the target data. Defaults to default preprocess for
        paper experiments.
    partitions: Optional[List[str]]
        List of partitions to load. Defaults to validation and test partitions

    Returns
    -------
    Union[tf.data.Dataset, Tuple[tf.data.Dataset]]
        Returns requested partions. If more than one partition is requested, returns a
        tuple.
    """

    dir_path = "data/" + dir_path
    ds_list = [
        (
            tf.data.Dataset.load(os.path.join(dir_path, partition), compression="GZIP")
            .prefetch(tf.data.experimental.AUTOTUNE)
            .map(target_transform_mapping)
        )
        for partition in partitions
    ]
    if len(ds_list) == 1:
        return ds_list[0]
    return tuple(ds_list)

In [3]:
ds_poisson_only_topo_val, ds_poisson_only_topo_test = load_dataset(
    "data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4"
)
ds_on_off_only_topo_val, ds_on_off_only_topo_test = load_dataset(
    "data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4"
)
ds_sim_poisson_on_off_val = load_dataset(
    "data_seg_poisson_on_off_simulated_0_4_100", partitions=["validation"]
)

2024-11-23 14:00:24.239440: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2024-11-23 14:00:24.239467: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:129] retrieving CUDA diagnostic information for host: pluto
2024-11-23 14:00:24.239471: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:136] hostname: pluto
2024-11-23 14:00:24.239522: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:159] libcuda reported version is: 515.65.1
2024-11-23 14:00:24.239535: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:163] kernel reported version is: 515.65.1
2024-11-23 14:00:24.239538: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:241] kernel version seems to match DSO: 515.65.1


# Models

In [4]:
def load_model(
    model_class: tf.keras.Model,
    model_params=dict(),
):
    """
    Loads a model with the specified hyperparameters.

    Parameters
    ----------
    model_class: tf.keras.Model
        Model class to load.
    model_params: Optional[Dict]
        Hyperparameters for the model initialization function.

    Returns
    -------
    tf.keras.Model
        Returns a model object
    """
    # Dummy z_scores only for model initialization, will be replaced by the actual
    # z_scores in evaluation
    dummy_z_scores = {kk: [0, 1] for kk in model_class.z_scores_fields}
    model = model_class(
        z_scores=dummy_z_scores,
        **model_params,
    )
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001, clipnorm=1.0),
        loss=tf.keras.losses.MeanAbsolutePercentageError(),
        run_eagerly=RUN_EAGERLY,
    )
    return model


def evaluate_model(model, experiment_path, ckpt, ds_list):
    """Loads specified checkpoint and evaluates the models in the specified datasets.

    Parameters
    ----------
    model : tf.keras.Model
        Instance of the model to evaluate.
    experiment_path : str
        Experiment path
    ckpt : str
        Checkpoint file name. Should be included in the experiment path
    ds_list : List[Tuple[tf.data.Dataset, str]]
        List of datasets to evaluate. Each element on the list must be a tuple with the
        dataset instance and a name (for stdout).
    """
    with open(
        os.path.join("normalization", experiment_path, "z_scores.pkl"), "rb"
    ) as ff:
        z_scores = pickle.load(ff)
    model.set_z_scores(z_scores)
    model.load_weights(os.path.join("ckpt", experiment_path, ckpt)).expect_partial()

    for ds, ds_name in ds_list:
        print(f"{ds_name} MAPE: {model.evaluate(ds):.3f}")

In [5]:
model_fermi_temporal_delay = load_model(
    RouteNet_temporal_delay, {"output_dim": 1, "mask_field": "flow_has_delay"}
)

## Experiments
Evaluating the validation dataset is useful to guarantee the checkpoint has been correctly loaded. Nonetheless, stick to the test partition for accurately evaluating the model's perfomance.

### Simulator + full data baseline

In [6]:
evaluate_model(
    model_fermi_temporal_delay,
    "baselines/data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/500_steps/avg_delay",
    "120-0.0132", 
    [
        (ds_sim_poisson_on_off_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
        (ds_on_off_only_topo_test, "On-Off topo_5_10_2_SP_k_4 test"),
    ]
)

Validation MAPE: 3.785
Poisson topo_5_10_2_SP_k_4 test MAPE: 60.905
On-Off topo_5_10_2_SP_k_4 test MAPE: 57.143


### Poisson

#### Baseline

In [7]:
evaluate_model(
    model_fermi_temporal_delay,
    "baselines/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/RouteNet_temporal_delay/500_steps/avg_delay",
    "79-0.0066", 
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

Validation MAPE: 4.861
Poisson topo_5_10_2_SP_k_4 test MAPE: 6.152


#### Fine-tune Results

In [8]:
print("ENCODING FREEZE, MPA FREEZE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding_mp_window/finetune_readout/all_samples/avg_delay",
    "475-0.0011",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FREEZE, READOUT FINE-TUNE
Validation MAPE: 2.678
Poisson topo_5_10_2_SP_k_4 test MAPE: 3.425


In [9]:
print("ENCODING FREEZE, MPA FREEZE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding_mp_window/retrain_readout/all_samples/avg_delay",
    "943-0.0068",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FREEZE, READOUT RE-TRAIN
Validation MAPE: 5.487
Poisson topo_5_10_2_SP_k_4 test MAPE: 6.499


In [10]:
print("ENCODING FREEZE, MPA FINE-TUNE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/finetune_mp_window_readout/all_samples/avg_delay",
    "131-0.0006",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FINE-TUNE, READOUT FINE-TUNE
Validation MAPE: 1.868
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.682


In [11]:
print("ENCODING FREEZE, MPA FINE-TUNE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/finetune_mp_window/retrain_readout/all_samples/avg_delay",
    "787-0.0012", 
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FINE-TUNE, READOUT RE-TRAIN
Validation MAPE: 2.010
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.105


In [12]:
print("ENCODING FREEZE, MPA RE-TRAIN, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/retrain_mp_window_readout/all_samples/avg_delay",
    "784-0.0012",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA RE-TRAIN, READOUT RE-TRAIN
Validation MAPE: 2.011
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.106


In [13]:
print("ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding_mp_window_readout/all_samples/avg_delay",
    "131-0.0006", 
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT FINE-TUNE


Validation MAPE: 1.811
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.742


In [14]:
print("ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding_mp_window/retrain_readout/all_samples/avg_delay/",
    "491-0.0022",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT RE-TRAIN
Validation MAPE: 3.350
Poisson topo_5_10_2_SP_k_4 test MAPE: 2.919


In [15]:
print("ENCODING FINE-TUNE, MPA RE-TRAIN, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_poisson_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding/retrain_mp_window_readout/all_samples/avg_delay",
    "491-0.0022",
    [
        (ds_poisson_only_topo_val, "Validation"),
        (ds_poisson_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA RE-TRAIN, READOUT RE-TRAIN
Validation MAPE: 3.350
Poisson topo_5_10_2_SP_k_4 test MAPE: 2.919


### On-off

#### Baseline

In [16]:
evaluate_model(
    model_fermi_temporal_delay,
    "baselines/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/RouteNet_temporal_delay/500_steps/avg_delay",
    "43-0.0021", 
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

Validation MAPE: 2.819
Poisson topo_5_10_2_SP_k_4 test MAPE: 3.182


#### Fine-tune Results

In [17]:
print("ENCODING FREEZE, MPA FREEZE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding_mp_window/finetune_readout/all_samples/avg_delay",
    "447-0.0021",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FREEZE, READOUT FINE-TUNE
Validation MAPE: 3.411
Poisson topo_5_10_2_SP_k_4 test MAPE: 3.171


In [18]:
print("ENCODING FREEZE, MPA FREEZE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding_mp_window/retrain_readout/all_samples/avg_delay",
    "666-0.0022",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FREEZE, READOUT RE-TRAIN
Validation MAPE: 3.669
Poisson topo_5_10_2_SP_k_4 test MAPE: 4.241


In [19]:
print("ENCODING FREEZE, MPA FINE-TUNE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/finetune_mp_window_readout/all_samples/avg_delay",
    "124-0.0005", 
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FINE-TUNE, READOUT FINE-TUNE
Validation MAPE: 1.529
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.425


In [20]:
print("ENCODING FREEZE, MPA FINE-TUNE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/finetune_mp_window/retrain_readout/all_samples/avg_delay",
    "548-0.0002",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA FINE-TUNE, READOUT RE-TRAIN
Validation MAPE: 0.965
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.292


In [21]:
print("ENCODING FREEZE, MPA RE-TRAIN, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/freeze_encoding/retrain_mp_window_readout/all_samples/avg_delay",
    "548-0.0002",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FREEZE, MPA RE-TRAIN, READOUT RE-TRAIN
Validation MAPE: 0.965
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.292


In [22]:
print("ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT FINE-TUNE")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding_mp_window_readout/all_samples/avg_delay",
    "116-0.0005", 
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT FINE-TUNE
Validation MAPE: 1.505
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.380


In [23]:
print("ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding_mp_window/retrain_readout/all_samples/avg_delay",
    "456-0.0008",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA FINE-TUNE, READOUT RE-TRAIN
Validation MAPE: 1.860
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.955


In [24]:
print("ENCODING FINE-TUNE, MPA RE-TRAIN, READOUT RE-TRAIN")
evaluate_model(
    model_fermi_temporal_delay,
    "fine_tuning/data_seg_on_off_0_4_100_v2/topo_5_10_2_SP_k_4/og_ds_data_seg_poisson_on_off_simulated_0_4_100/RouteNet_temporal_delay/finetune_encoding/retrain_mp_window_readout/all_samples/avg_delay",
    "456-0.0008",
    [
        (ds_on_off_only_topo_val, "Validation"),
        (ds_on_off_only_topo_test, "Poisson topo_5_10_2_SP_k_4 test"),
    ]
)

ENCODING FINE-TUNE, MPA RE-TRAIN, READOUT RE-TRAIN
Validation MAPE: 1.860
Poisson topo_5_10_2_SP_k_4 test MAPE: 1.955
