In [1]:
import os
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

2024-12-13 22:19:58.850135: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1734146398.861521 3973113 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1734146398.864626 3973113 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-12-13 22:19:58.876834: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
DATA_DIR = "./network_dataset"
MAX_DEPTH = 10
ATTR_DIM = 3
D_MODEL = 64
NUM_HEADS = 4
ENCODER_LAYERS = 2
FF_DIM = 128
DROPOUT = 0.1
BATCH_SIZE = 64
EPOCHS = 10
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1

In [3]:
def load_npz_files(data_dir):
    file_list = sorted(glob.glob(os.path.join(data_dir, "dataset_part_*.npz")))
    adjacency_all = []
    attributes_all = []
    latencies_all = []

    for f in file_list:
        data = np.load(f)
        adjacency = data['adjacency']
        attributes = data['attributes']
        latencies = data['latencies']

        adjacency_all.append(adjacency)
        attributes_all.append(attributes)
        latencies_all.append(latencies)

    adjacency_all = np.concatenate(adjacency_all, axis=0)
    attributes_all = np.concatenate(attributes_all, axis=0)
    latencies_all = np.concatenate(latencies_all, axis=0)
    return adjacency_all, attributes_all, latencies_all

In [4]:
def create_masks(attributes):
    # attributes: (N, 10, 3)
    node_exists = np.any(attributes != 0, axis=-1)  # shape (N, 10)
    mask = node_exists  # boolean mask: True for existing nodes, False for padded
    return mask

In [5]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, max_length, d_model):
        super().__init__()
        # Ensure arguments are named to avoid conflicts
        self.pos_embedding = self.add_weight(
            name="pos_embedding",
            shape=(1, max_length, d_model),
            initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
            trainable=True
        )

    def call(self, x):
        return x + self.pos_embedding

In [6]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=dropout)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation='relu'),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)

    def call(self, inputs, mask=None, training=False):
        if mask is not None:
            # Expand mask for MultiHeadAttention: (batch, 1, 1, seq_len)
            mha_mask = tf.expand_dims(tf.expand_dims(mask, 1), 1)
        else:
            mha_mask = None

        attn_output = self.att(inputs, inputs, attention_mask=mha_mask, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [7]:
def create_transformer_model(max_depth=MAX_DEPTH, attr_dim=ATTR_DIM, d_model=D_MODEL, num_heads=NUM_HEADS, ff_dim=FF_DIM, num_layers=ENCODER_LAYERS, dropout=DROPOUT):
    inputs = layers.Input(shape=(max_depth, attr_dim))
    mask_input = layers.Input(shape=(max_depth,), dtype=tf.bool, name="mask_input")

    x = layers.Dense(d_model)(inputs)
    x = PositionalEmbedding(max_depth, d_model)(x)

    for _ in range(num_layers):
        x = TransformerBlock(d_model, num_heads, ff_dim, dropout)(x, mask=mask_input)

    # Instead of using tf.cast and tf.reduce_sum directly, use Lambda layers:
    mask_float = layers.Lambda(lambda m: tf.cast(m, tf.float32))(mask_input)
    x_masked = layers.Lambda(lambda inputs: inputs[0] * tf.expand_dims(inputs[1], -1))([x, mask_float])
    masked_sum = layers.Lambda(lambda xm: tf.reduce_sum(xm, axis=1))(x_masked)
    denom = layers.Lambda(lambda mf: tf.reduce_sum(mf, axis=1, keepdims=True) + 1e-9)(mask_float)
    pooled = layers.Lambda(lambda inputs: inputs[0] / inputs[1])([masked_sum, denom])

    outputs = layers.Dense(1)(pooled)

    model = tf.keras.Model(inputs=[inputs, mask_input], outputs=outputs)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [8]:
if __name__ == "__main__":
    adjacency_all, attributes_all, latencies_all = load_npz_files(DATA_DIR)
    masks = create_masks(attributes_all)

    N = len(latencies_all)
    idxs = np.arange(N)
    np.random.shuffle(idxs)

    val_size = int(VAL_SPLIT * N)
    test_size = int(TEST_SPLIT * N)
    train_size = N - val_size - test_size

    train_idxs = idxs[:train_size]
    val_idxs = idxs[train_size:train_size+val_size]
    test_idxs = idxs[train_size+val_size:]

    X_train = attributes_all[train_idxs]
    X_train_mask = masks[train_idxs]
    y_train = latencies_all[train_idxs]

    X_val = attributes_all[val_idxs]
    X_val_mask = masks[val_idxs]
    y_val = latencies_all[val_idxs]

    X_test = attributes_all[test_idxs]
    X_test_mask = masks[test_idxs]
    y_test = latencies_all[test_idxs]

    model = create_transformer_model()
    model.summary()

    model.fit(
        [X_train, X_train_mask],
        y_train,
        validation_data=([X_val, X_val_mask], y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE
    )

    model.save("transformer_latency_model.h5")
    print("Model saved as transformer_latency_model.h5")

    test_loss, test_mae = model.evaluate([X_test, X_test_mask], y_test, batch_size=BATCH_SIZE)
    print(f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")

W0000 00:00:1734146400.560272 3973113 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 21ms/step - loss: 0.0494 - mae: 0.0682 - val_loss: 1.8020e-04 - val_mae: 0.0073
Epoch 2/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 21ms/step - loss: 2.5833e-04 - mae: 0.0108 - val_loss: 4.2966e-04 - val_mae: 0.0190
Epoch 3/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 20ms/step - loss: 2.2227e-04 - mae: 0.0093 - val_loss: 1.8773e-04 - val_mae: 0.0087
Epoch 4/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 20ms/step - loss: 2.0228e-04 - mae: 0.0084 - val_loss: 1.7242e-04 - val_mae: 0.0067
Epoch 5/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 21ms/step - loss: 1.8784e-04 - mae: 0.0080 - val_loss: 1.8743e-04 - val_mae: 0.0067
Epoch 6/10
[1m3375/3375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 20ms/step - loss: 1.7625e-04 - mae: 0.0076 - val_loss: 1.5223e-04 - val_mae: 0.0067
Epoch 7/10
[1m3375/



Model saved as transformer_latency_model.h5
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 9.6587e-05 - mae: 0.0043
Test Loss: 0.0001, Test MAE: 0.0044
