In [1]:
import os
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

In [2]:
DATA_DIR = "./network_dataset"
MAX_DEPTH = 10
ATTR_DIM = 3
D_MODEL = 64
NUM_HEADS = 4
ENCODER_LAYERS = 2
FF_DIM = 128
DROPOUT = 0.1
BATCH_SIZE = 64
EPOCHS = 20
VAL_SPLIT = 0.1
TEST_SPLIT = 0.1

In [3]:
def load_npz_files(data_dir):
    file_list = sorted(glob.glob(os.path.join(data_dir, "dataset_part_*.npz")))
    adjacency_all = []
    attributes_all = []
    latencies_all = []

    for f in file_list:
        data = np.load(f)
        adjacency = data['adjacency']   # shape (N, 10, 10)
        attributes = data['attributes'] # shape (N, 10, 3)
        latencies = data['latencies']   # shape (N,)

        adjacency_all.append(adjacency)
        attributes_all.append(attributes)
        latencies_all.append(latencies)

    adjacency_all = np.concatenate(adjacency_all, axis=0)
    attributes_all = np.concatenate(attributes_all, axis=0)
    latencies_all = np.concatenate(latencies_all, axis=0)
    return adjacency_all, attributes_all, latencies_all

In [4]:
def create_masks(attributes):
    node_exists = np.any(attributes != 0, axis=-1)
    mask = node_exists
    return mask

In [5]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, max_length, d_model):
        super().__init__()
        self.pos_embedding = self.add_weight(
            "pos_embedding", shape=(1, max_length, d_model),
            initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02)
        )

    def call(self, x):
        return x + self.pos_embedding

In [6]:
class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, dropout=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, dropout=dropout)
        self.ffn = models.Sequential([
            layers.Dense(ff_dim, activation='relu'),
            layers.Dense(embed_dim),
        ])
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(dropout)
        self.dropout2 = layers.Dropout(dropout)

    def call(self, inputs, mask=None, training=False):
        if mask is not None:
            # Expand mask for MultiHeadAttention: (batch, 1, 1, seq_len)
            mha_mask = tf.expand_dims(tf.expand_dims(mask, 1), 1)
        else:
            mha_mask = None

        attn_output = self.att(inputs, inputs, attention_mask=mha_mask, training=training)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [7]:
def create_transformer_model(max_depth=MAX_DEPTH, attr_dim=ATTR_DIM, d_model=D_MODEL, num_heads=NUM_HEADS, ff_dim=FF_DIM, num_layers=ENCODER_LAYERS, dropout=DROPOUT):
    inputs = layers.Input(shape=(max_depth, attr_dim))
    mask_input = layers.Input(shape=(max_depth,), dtype=tf.bool, name="mask_input")

    x = layers.Dense(d_model)(inputs)
    x = PositionalEmbedding(max_depth, d_model)(x)

    for _ in range(num_layers):
        x = TransformerBlock(d_model, num_heads, ff_dim, dropout)(x, mask=mask_input)

    mask_float = tf.cast(mask_input, tf.float32)  
    masked_sum = tf.reduce_sum(x * tf.expand_dims(mask_float, -1), axis=1)  
    denom = tf.reduce_sum(mask_float, axis=1, keepdims=True) + 1e-9
    pooled = masked_sum / denom

    outputs = layers.Dense(1)(pooled)

    model = tf.keras.Model(inputs=[inputs, mask_input], outputs=outputs)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [8]:
if __name__ == "__main__":
    adjacency_all, attributes_all, latencies_all = load_npz_files(DATA_DIR)
    masks = create_masks(attributes_all)

    N = len(latencies_all)
    idxs = np.arange(N)
    np.random.shuffle(idxs)

    val_size = int(VAL_SPLIT * N)
    test_size = int(TEST_SPLIT * N)
    train_size = N - val_size - test_size

    train_idxs = idxs[:train_size]
    val_idxs = idxs[train_size:train_size+val_size]
    test_idxs = idxs[train_size+val_size:]

    X_train = attributes_all[train_idxs]
    X_train_mask = masks[train_idxs]
    y_train = latencies_all[train_idxs]

    X_val = attributes_all[val_idxs]
    X_val_mask = masks[val_idxs]
    y_val = latencies_all[val_idxs]

    X_test = attributes_all[test_idxs]
    X_test_mask = masks[test_idxs]
    y_test = latencies_all[test_idxs]

    model = create_transformer_model()
    model.summary()

    model.fit(
        [X_train, X_train_mask],
        y_train,
        validation_data=([X_val, X_val_mask], y_val),
        epochs=EPOCHS,
        batch_size=BATCH_SIZE
    )

    model.save("transformer_latency_model.h5")
    print("Model saved as transformer_latency_model.h5")

    test_loss, test_mae = model.evaluate([X_test, X_test_mask], y_test, batch_size=BATCH_SIZE)
    print(f"Test Loss: {test_loss:.4f}, Test MAE: {test_mae:.4f}")

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 10, 3)]      0           []                               
                                                                                                  
 dense (Dense)                  (None, 10, 64)       256         ['input_1[0][0]']                
                                                                                                  
 positional_embedding (Position  (None, 10, 64)      640         ['dense[0][0]']                  
 alEmbedding)                                                                                     
                                                                                                  
 mask_input (InputLayer)        [(None, 10)]         0           []                           

2024-12-11 11:09:48.167232: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model saved as transformer_latency_model.h5
Test Loss: 0.0003, Test MAE: 0.0163
