In [2]:
import os
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split



In [3]:
nodeset = pd.read_csv(f"../out/features.csv")
df = pd.read_csv(f"../out/edges.csv")

In [4]:
nodeset["label"] = nodeset["node"].str.contains(r"(sbox|mixcolumn)",
                                          case=False, na=False).astype(int)
nodeset

  nodeset["label"] = nodeset["node"].str.contains(r"(sbox|mixcolumn)",


Unnamed: 0,node,Degree,Hamming distance,Node,Noise,Paths,and,mux,node_number,or,xor,label
0,dut.260:BL,2,1451,"dut.260:BL\n{ state[0], state[1], state[2], st...",0,0,0,0,0,0,0,0
1,dut.InvMixColumn2.730:AS,1,0,dut.InvMixColumn2.730:AS\nm3c = mul4x8({ 4'hb ...,1,0,0,0,1,0,1,1
2,dut.169:NS,2,0,"dut.169:NS\n{ w[0], w[1], w[2], w[3] } <= key_in;",0,1,0,0,2,0,0,0
3,dut.Leaf_208:AL,16,0,dut.Leaf_208:AL,0,0,0,0,3,0,0,0
4,dut.130:NS,2,2,dut.130:NS\nbusy <= 1'b0;,1,31,0,0,4,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...
222,dut.InvSubBytes.544:AS,8,0,dut.InvSubBytes.544:AS\nb21 = is_box(a21);,0,0,0,0,222,0,0,0
223,dut.209:BL,2,0,"dut.209:BL\n{ iw[0], iw[1], iw[2], iw[3] } <= ...",1,0,0,0,223,0,0,0
224,dut.355:IF,3,0,dut.355:IF,0,0,0,0,224,0,0,0
225,dut.99:BS,2,8,dut.99:BS\nnext_state = 3'h2;,0,51,0,0,225,0,0,0


In [5]:
label = "label"
# In the paper, nodes belonging to the Sbox and Mixcolumns modules are labeled as 1

class_values = sorted(nodeset[label].unique())
class_idx = {name: id for id, name in enumerate(class_values)}
paper_idx = {name: idx for idx, name in enumerate(sorted(nodeset["node_number"].unique()))}

nodeset["node_number"] = nodeset["node_number"].apply(lambda name: paper_idx[name])
df["source"] = df["source"].apply(lambda name: paper_idx[name])
df["target"] = df["target"].apply(lambda name: paper_idx[name])
nodeset[label] = nodeset[label].apply(lambda value: class_idx[value])

print(nodeset.label.value_counts())

label
0    195
1     32
Name: count, dtype: int64


In [6]:
train_data = nodeset.iloc[0:196]
test_data = nodeset.iloc[196:]

In [7]:
hidden_units = [32, 32]
learning_rate = 0.0001
dropout_rate = 0.3
num_epochs = 32
batch_size = 20

In [8]:
def run_experiment(model, x_train, y_train):
    # Compile the model.
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate),
        loss=keras.losses.BinaryCrossentropy(from_logits=False),
        metrics=[keras.metrics.BinaryAccuracy(name="acc"),
keras.metrics.Precision(), keras.metrics.Recall()],
    )
    # Create an early stopping callback.
    early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_acc", patience=50, restore_best_weights=True
    )
    # Fit the model.
    history = model.fit(
        x=x_train,
        y=y_train,
        epochs=num_epochs,
        batch_size=batch_size,
        validation_split=0.10,
        callbacks=[early_stopping],
    )

    return history

In [9]:
def display_learning_curves(history):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

    ax1.plot(history.history["loss"])
    ax1.plot(history.history["val_loss"])
    ax1.legend(["train", "test"], loc="upper right")
    ax1.set_xlabel("Epochs")
    ax1.set_ylabel("Loss")

    ax2.plot(history.history["acc"])
    ax2.plot(history.history["val_acc"])
    ax2.legend(["train", "test"], loc="upper right")
    ax2.set_xlabel("Epochs")
    ax2.set_ylabel("Accuracy")
    plt.show()

In [10]:
def create_ffn(hidden_units, dropout_rate, name=None):
    fnn_layers = []

    for units in hidden_units:
        fnn_layers.append(layers.BatchNormalization())
        fnn_layers.append(layers.Dropout(dropout_rate))
        fnn_layers.append(layers.Dense(units, activation=tf.nn.relu))

    return keras.Sequential(fnn_layers, name=name)

In [11]:
feature_names = set(nodeset.columns) - {"node_number", "Node", "node", label}
# feature_names = feature_names - {"and", "or", "xor", "mux"}
# feature_names = feature_names - {"Hamming distance"}
# feature_names = feature_names - {"Paths"}
# feature_names = feature_names - {"Degree"}
feature_names = feature_names - {"Noise"}
num_features = len(feature_names)
num_classes = len(class_idx)

print(train_data[list(feature_names)])
# Create train and test features as a numpy array.
x_train = train_data[list(feature_names)].to_numpy()
x_test = test_data[list(feature_names)].to_numpy()
#print(x_train)
# Create train and test targets as a numpy array.
y_train = train_data[label]
y_test = test_data[label]

     or  Paths  mux  and  xor  Degree  Hamming distance
0     0      0    0    0    0       2              1451
1     0      0    0    0    1       1                 0
2     0      1    0    0    0       2                 0
3     0      0    0    0    0      16                 0
4     0     31    0    0    0       2                 2
..   ..    ...  ...  ...  ...     ...               ...
191   0      0    0    0    0       2                 0
192   0      0    0    0    1       1                 0
193   0      0    1    0    1      18               368
194   0    374    0    0    0       2                 0
195   0      0    0    0    0       2                 0

[196 rows x 7 columns]


In [12]:
def create_baseline_model(hidden_units, num_classes, dropout_rate=0.2):
    inputs = layers.Input(shape=(num_features,), name="input_features")
    x = create_ffn(hidden_units, dropout_rate, name=f"ffn_block1")(inputs)
    for block_idx in range(4):
        # Create an FFN block.
        x1 = create_ffn(hidden_units, dropout_rate, name=f"ffn_block{block_idx + 2}")(x)
        # Add skip connection.
        x = layers.Add(name=f"skip_connection{block_idx + 2}")([x, x1])
    # Compute logits.
    logits = layers.Dense(num_classes-1, name="logits")(x)
    # Create the model.
    return keras.Model(inputs=inputs, outputs=logits, name="baseline")


baseline_model = create_baseline_model(hidden_units, num_classes, dropout_rate)
baseline_model.summary()

history = run_experiment(baseline_model, x_train, y_train)

Epoch 1/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - acc: 0.3667 - loss: 9.0169 - precision: 0.1888 - recall: 0.9161 - val_acc: 0.4000 - val_loss: 5.5764 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 2/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - acc: 0.3693 - loss: 8.8664 - precision: 0.1386 - recall: 0.7767 - val_acc: 0.4500 - val_loss: 5.5819 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - acc: 0.3317 - loss: 8.9991 - precision: 0.1472 - recall: 0.7351 - val_acc: 0.4500 - val_loss: 4.2165 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 4/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - acc: 0.3756 - loss: 8.7993 - precision: 0.1500 - recall: 0.7758 - val_acc: 0.4000 - val_loss: 4.0573 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 5/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━

In [13]:
# Create an edges array (sparse adjacency matrix) of shape [2, num_edges].
edges = df[["source", "target"]].to_numpy().T
edge_weights = tf.ones(shape=edges.shape[1])

node_features = tf.cast(
    nodeset.sort_values("node_number")[list(feature_names)].to_numpy(), dtype=tf.dtypes.float32
)
# Create graph with node features, edges, and edge_weights.
graph_info = (node_features, edges, edge_weights)

print("Edges shape:", edges.shape)
print("Nodes shape:", node_features.shape)
graph_info

Edges shape: (2, 556)
Nodes shape: (227, 7)


(<tf.Tensor: shape=(227, 7), dtype=float32, numpy=
 array([[0.000e+00, 0.000e+00, 0.000e+00, ..., 0.000e+00, 2.000e+00,
         1.451e+03],
        [0.000e+00, 0.000e+00, 0.000e+00, ..., 1.000e+00, 1.000e+00,
         0.000e+00],
        [0.000e+00, 1.000e+00, 0.000e+00, ..., 0.000e+00, 2.000e+00,
         0.000e+00],
        ...,
        [0.000e+00, 0.000e+00, 0.000e+00, ..., 0.000e+00, 3.000e+00,
         0.000e+00],
        [0.000e+00, 5.100e+01, 0.000e+00, ..., 0.000e+00, 2.000e+00,
         8.000e+00],
        [0.000e+00, 5.100e+01, 0.000e+00, ..., 0.000e+00, 2.000e+00,
         8.000e+00]], dtype=float32)>,
 array([[ 71,  31, 119, ..., 220, 146, 133],
        [ 51, 101, 138, ..., 144,  63,  52]]),
 <tf.Tensor: shape=(556,), dtype=float32, numpy=
 array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1

In [14]:
class GraphConvLayer(layers.Layer):
    def __init__(
        self,
        hidden_units,
        dropout_rate=0.3,
        aggregation_type="mean",
        combination_type="concat",
        normalize=False,
        *args,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)

        self.aggregation_type = aggregation_type
        self.combination_type = combination_type
        self.normalize = normalize

        self.ffn_prepare = create_ffn(hidden_units, dropout_rate)
        if self.combination_type == "gated":
            self.update_fn = layers.GRU(
                units=hidden_units,
                activation="tanh",
                recurrent_activation="sigmoid",
                dropout=dropout_rate,
                return_state=True,
                recurrent_dropout=dropout_rate,
            )
        else:
            self.update_fn = create_ffn(hidden_units, dropout_rate)

    def prepare(self, node_repesentations, weights=None):
        # node_repesentations shape is [num_edges, embedding_dim].
        messages = self.ffn_prepare(node_repesentations)
        if weights is not None:
            messages = messages * tf.expand_dims(weights, -1)
        return messages

    def aggregate(self, node_indices, neighbour_messages, node_repesentations):

        num_nodes = node_repesentations.shape[0]
        if self.aggregation_type == "sum":
            aggregated_message = tf.math.unsorted_segment_sum(
                neighbour_messages, node_indices, num_segments=num_nodes
            )
        elif self.aggregation_type == "mean":
            aggregated_message = tf.math.unsorted_segment_mean(
                neighbour_messages, node_indices, num_segments=num_nodes
            )
        elif self.aggregation_type == "max":
            aggregated_message = tf.math.unsorted_segment_max(
                neighbour_messages, node_indices, num_segments=num_nodes
            )
        else:
            raise ValueError(f"Invalid aggregation type: {self.aggregation_type}.")

        return aggregated_message

    def update(self, node_repesentations, aggregated_messages):

        if self.combination_type == "gru":
            # Create a sequence of two elements for the GRU layer.
            h = tf.stack([node_repesentations, aggregated_messages], axis=1)
        elif self.combination_type == "concat":
            # Concatenate the node_repesentations and aggregated_messages.
            h = tf.concat([node_repesentations, aggregated_messages], axis=1)
        elif self.combination_type == "add":
            # Add node_repesentations and aggregated_messages.
            h = node_repesentations + aggregated_messages
        else:
            raise ValueError(f"Invalid combination type: {self.combination_type}.")

        node_embeddings = self.update_fn(h)
        if self.combination_type == "gru":
            node_embeddings = tf.unstack(node_embeddings, axis=1)[-1]

        if self.normalize:
            node_embeddings = tf.nn.l2_normalize(node_embeddings, axis=-1)
        return node_embeddings

    def call(self, inputs):

        node_repesentations, edges, edge_weights = inputs

        node_indices, neighbour_indices = edges[0], edges[1]
        neighbour_repesentations = tf.gather(node_repesentations, neighbour_indices)
        neighbour_messages = self.prepare(neighbour_repesentations, edge_weights)

        aggregated_messages = self.aggregate(
            node_indices, neighbour_messages, node_repesentations
        )

        return self.update(node_repesentations, aggregated_messages)

In [15]:
class GNNNodeClassifier(tf.keras.Model):
    def __init__(
        self,
        graph_info,
        num_classes,
        hidden_units,
        aggregation_type="sum",
        combination_type="concat",
        dropout_rate=0.3,
        normalize=True,
        *args,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)

        # Unpack graph_info to three elements: node_features, edges, and edge_weight.
        node_features, edges, edge_weights = graph_info
        self.node_features = node_features
        self.edges = edges
        self.edge_weights = edge_weights
        # Set edge_weights to ones if not provided.
        if self.edge_weights is None:
            self.edge_weights = tf.ones(shape=edges.shape[1])
        # Scale edge_weights to sum to 1.
        self.edge_weights = self.edge_weights / tf.math.reduce_sum(self.edge_weights)

        # Create a process layer.
        self.preprocess = create_ffn(hidden_units, dropout_rate, name="preprocess")
        # Create the first GraphConv layer.
        self.conv1 = GraphConvLayer(
            hidden_units,
            dropout_rate,
            aggregation_type,
            combination_type,
            normalize,
            name="graph_conv1",
        )
        # Create the second GraphConv layer.
        self.conv2 = GraphConvLayer(
            hidden_units,
            dropout_rate,
            aggregation_type,
            combination_type,
            normalize,
            name="graph_conv2",
        )
        # Create a postprocess layer.
        self.postprocess = create_ffn(hidden_units, dropout_rate, name="postprocess")
        # Create a compute logits layer.
        self.compute_logits = layers.Dense(units=num_classes,activation="softmax", name="logits")

    def call(self, input_node_indices):
        # Preprocess the node_features to produce node representations.
        x = self.preprocess(self.node_features)
        # Apply the first graph conv layer.
        x1 = self.conv1((x, self.edges, self.edge_weights))
        # Skip connection.
        x = x1 + x
        # Apply the second graph conv layer.
        x2 = self.conv2((x, self.edges, self.edge_weights))
        # Skip connection.
        x = x2 + x
        # Postprocess node embedding.
        x = self.postprocess(x)
        # Fetch node embeddings for the input node_indices.
        node_embeddings = tf.gather(x, input_node_indices)
        print(node_embeddings)
        # Compute logits
        return self.compute_logits(node_embeddings)

    def build(self, input_shape):
        super().build(input_shape)

In [16]:
gnn_model = GNNNodeClassifier(
    graph_info=graph_info,
    num_classes=num_classes,
    hidden_units=hidden_units,
    dropout_rate=dropout_rate,
    name="gnn_model",
)

print("GNN output shape:", gnn_model(tf.constant([[1, 10, 100]], dtype=tf.int32)))

gnn_model.summary()

tf.Tensor(
[[[0.02003971 0.         0.         0.         0.         0.
   0.         0.         0.         0.30862334 0.         0.07438131
   0.1022206  0.18588427 0.19887774 0.         0.28273135 0.
   0.         0.         0.         0.         0.06755412 0.
   0.         0.         0.12248891 0.         0.         0.
   0.         0.23446469]
  [0.         0.         0.         0.07277139 0.06619395 0.
   0.05428777 0.         0.         0.09035148 0.         0.
   0.5682612  0.41806152 0.32602882 0.         0.20732953 0.
   0.03104911 0.         0.         0.         0.15303479 0.24448371
   0.         0.         0.04766712 0.         0.17863433 0.
   0.         0.44059458]
  [2.8078716  0.         0.         0.         2.2051752  2.6997216
   0.         0.         0.36069405 1.2055695  0.         0.
   2.1635413  0.03383279 2.1975543  0.         3.0532079  1.7888418
   0.         0.         0.         1.1675535  0.         0.
   0.         1.8504066  1.1268985  0.         0.    

In [17]:
y_train1 = tf.keras.utils.to_categorical(
    y_train, num_classes=2)
y_test1 = tf.keras.utils.to_categorical(
    y_test, num_classes=2)

In [18]:
x_train = train_data.node_number.to_numpy()
history = run_experiment(gnn_model, x_train, y_train1)

Epoch 1/32
Tensor("gnn_model_1/GatherV2:0", shape=(None, 32), dtype=float32)
Tensor("gnn_model_1/GatherV2:0", shape=(None, 32), dtype=float32)
[1m1/9[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m18s[0m 2s/step - acc: 0.8500 - loss: 0.3863 - precision_1: 0.8500 - recall_1: 0.8500Tensor("gnn_model_1/GatherV2:0", shape=(20, 32), dtype=float32)
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - acc: 0.8579 - loss: 0.3423 - precision_1: 0.8579 - recall_1: 0.8579 - val_acc: 0.8500 - val_loss: 0.3188 - val_precision_1: 0.8500 - val_recall_1: 0.8500
Epoch 2/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - acc: 0.8492 - loss: 0.3253 - precision_1: 0.8492 - recall_1: 0.8492 - val_acc: 0.8500 - val_loss: 0.3018 - val_precision_1: 0.8500 - val_recall_1: 0.8500
Epoch 3/32
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - acc: 0.8701 - loss: 0.2919 - precision_1: 0.8701 - recall_1: 0.8701 - val_acc: 0.8500 - val_loss: 0.287

In [19]:
from keras import backend as K

x_test = test_data.node_number.to_numpy()
_, test_accuracy, precision, recall = gnn_model.evaluate(x=x_test, y=y_test1, verbose=0)
print(f"Test accuracy: {round(test_accuracy * 100, 2)}%")
print(f"Test precision: {(precision* 100)}%")
print(f"Test recall: {(recall * 100)}%")

Tensor("gnn_model_1/GatherV2:0", shape=(None, 32), dtype=float32)
Test accuracy: 93.55%
Test precision: 93.54838728904724%
Test recall: 93.54838728904724%


In [20]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

y_pred= gnn_model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)
y_testt = np.argmax(y_test1, axis=1)
cm = confusion_matrix(y_testt, y_pred)
print(cm)
print(classification_report(y_testt,y_pred))

Tensor("gnn_model_1/GatherV2:0", shape=(31, 32), dtype=float32)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
[[28  0]
 [ 2  1]]
              precision    recall  f1-score   support

           0       0.93      1.00      0.97        28
           1       1.00      0.33      0.50         3

    accuracy                           0.94        31
   macro avg       0.97      0.67      0.73        31
weighted avg       0.94      0.94      0.92        31



In [21]:
!python Feature_Extract.py AES_PPRM1/AES_PPRM1 Kin AES_PPRM1
!python Feature_Extract.py AES_PPRM3/AES_PPRM3 Kin AES_PPRM3
!python Feature_Extract.py AES_TBL/AES_TBL Kin AES_TBL
!python Feature_Extract.py RSA/RSA Kin RSA

Generating LALR tables
[INFO] Features written to ../test/AES_PPRM1_features.csv
[INFO] Edges written to ../test/AES_PPRM1_edges.csv
Generating LALR tables
[INFO] Features written to ../test/AES_PPRM3_features.csv
[INFO] Edges written to ../test/AES_PPRM3_edges.csv
Generating LALR tables
[INFO] Features written to ../test/AES_TBL_features.csv
[INFO] Edges written to ../test/AES_TBL_edges.csv
Generating LALR tables
[INFO] Features written to ../test/RSA_features.csv
[INFO] Edges written to ../test/RSA_edges.csv


In [36]:
import os, glob
import numpy as np
import pandas as pd
import tensorflow as tf

TEST_DIR = "../test"

feature_files = sorted(glob.glob(os.path.join(TEST_DIR, "*_features.csv")))
paired = []
for fpath in feature_files:
    base = os.path.basename(fpath).replace("_features.csv", "")
    epath = os.path.join(TEST_DIR, f"{base}_edges.csv")
    if os.path.exists(epath):
        paired.append((base, fpath, epath))

if not paired:
    print("No dataset pairs found under ./test (expect *_features.csv + *_edges.csv)")
else:
    print(f"Found {len(paired)} dataset(s):", [b for b,_,_ in paired])

leaky_module = {
    "AES_PPRM1": ["SBOX", "Mixcolumns"],
    "AES_PPRM3": ["SBOX", "Mixcolumns"],
    "AES_TBL": ["SBOX", "Mixcolumns"],
}

for base, ffeat, fedge in paired:
    print(f"\n=== [{base}] ===")
    test_nodeset = pd.read_csv(ffeat)
    test_edge = pd.read_csv(fedge)
    keywords = leaky_module.get(base, [])
    def contains_any(value, keywords):
        return any(kw in str(value) for kw in keywords)
    test_nodeset["label"] = test_nodeset["node"].apply(
        lambda x: 1 if contains_any(x, keywords) else 0
    )

    test_feature = test_nodeset.node_number.to_numpy()
    test_target = tf.keras.utils.to_categorical(test_nodeset["label"], num_classes=2)

        # ===== Chunked evaluation (avoid indices out of training bound) =====
    # 1) Automatically detect training upper bound (max index = TRAIN_N - 1)
    def _detect_train_N(model):
        # Try to get from Embedding layer
        for layer in model.layers:
            try:
                from tensorflow.keras.layers import Embedding
            except Exception:
                Embedding = tf.keras.layers.Embedding
            if isinstance(layer, Embedding):
                if getattr(layer, "embeddings", None) is not None and layer.embeddings.shape.rank == 2:
                    return int(layer.embeddings.shape[0])
                if getattr(layer, "input_dim", None) is not None:
                    return int(layer.input_dim)
        # Try common variable names (if custom feature table exists)
        for attr in ["feature_table", "node_features", "features"]:
            if hasattr(model, attr):
                var = getattr(model, attr)
                try:
                    return int(var.shape[0])
                except Exception:
                    pass
        return None

    TRAIN_N = _detect_train_N(gnn_model)
    if TRAIN_N is None:
        TRAIN_N = 227  # fallback
        print(f"[{base}] Warning: cannot detect train_N from model; fallback TRAIN_N={TRAIN_N}")

    # 2) Split test set into chunks of at most TRAIN_N samples
    CHUNK = TRAIN_N
    n_total = test_feature.shape[0]

    # Ensure dtype and shape
    test_feature = np.asarray(test_feature, dtype=np.int32)
    test_target  = np.asarray(test_target,  dtype=np.float32)

    total_samples = 0
    acc_sum = 0.0
    prec_sum = 0.0
    rec_sum = 0.0

    for start in range(0, n_total, CHUNK):
        end = min(start + CHUNK, n_total)
        idx_chunk = test_feature[start:end]
        y_chunk   = test_target[start:end]

        # Filter out indices >= TRAIN_N (would cause Gather out of range)
        unique_idx = np.unique(idx_chunk)
        remap = {old: new for new, old in enumerate(unique_idx)}
        idx_valid = np.array([remap[i] for i in idx_chunk], dtype=np.int32)
        y_valid   = y_chunk

        # Evaluate this chunk
        loss, acc, prec, rec = gnn_model.evaluate(x=idx_valid, y=y_valid, verbose=0)

        m = idx_valid.shape[0]
        total_samples += m
        acc_sum  += float(acc)  * m
        prec_sum += float(prec) * m
        rec_sum  += float(rec)  * m

    if total_samples == 0:
        print(f"[{base}] Warning: no valid indices (< {TRAIN_N}) in test set; skipped.")
    else:
        test_accuracy = acc_sum / total_samples
        precision     = prec_sum / total_samples
        recall        = rec_sum / total_samples
        print(f"Test accuracy: {acc_sum} / {total_samples} = {round(test_accuracy * 100, 2)}%")
        # Optional: print precision/recall as well
        # print(f"Test precision: {round(precision * 100, 2)}%")
        # print(f"Test recall:    {round(recall * 100, 2)}%")


Found 4 dataset(s): ['AES_PPRM1', 'AES_PPRM3', 'AES_TBL', 'RSA']

=== [AES_PPRM1] ===
Test accuracy: 55.00000011920929 / 58 = 94.83%

=== [AES_PPRM3] ===
Test accuracy: 387.9999958872795 / 418 = 92.82%

=== [AES_TBL] ===
Test accuracy: 55.00000011920929 / 58 = 94.83%

=== [RSA] ===
Test accuracy: 409.00000435113907 / 441 = 92.74%
