In [None]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import warnings
import time

# -------------------------
# Setup dummy dataset files
# -------------------------
dataset_dir = "dataset"

# -------------------------
# Refactored GAT Code
# -------------------------
warnings.filterwarnings("ignore")
np.random.seed(2)
tf.random.set_seed(2)

print("TensorFlow Version:", tf.__version__)
gpus = tf.config.list_physical_devices("GPU")
if gpus:
    try:
        tf.config.set_visible_devices(gpus[0], "GPU")
        print("Using GPU:", gpus[0])
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected. Training will run on CPU.")

start_time = time.time()


# Data loading functions
def find_all_scene_ids(dataset_dir):
    scene_ids = []
    for file in os.listdir(dataset_dir):
        if file.endswith(".edges"):
            scene_id = file.split(".")[0]
            scene_ids.append(scene_id)
    return scene_ids


def load_all_subgraphs(dataset_dir):
    scene_ids = find_all_scene_ids(dataset_dir)
    scenes = []
    for scene_id in scene_ids:
        edges_file = os.path.join(dataset_dir, f"{scene_id}.edges")
        nodes_file = os.path.join(dataset_dir, f"{scene_id}.nodes")
        if not os.path.exists(edges_file) or not os.path.exists(nodes_file):
            print(f"Skipping scene ID {scene_id}: Missing files.")
            continue

        # Load edges and nodes.
        edges = pd.read_csv(edges_file, sep=",", header=None, names=["target", "source"])
        nodes = pd.read_csv(
            nodes_file,
            sep=",",
            header=None,
            names=["node_id", "current_x", "current_y", "previous_x", "previous_y", "future_x", "future_y"],
        )
        for col in nodes.columns:
            nodes[col] = pd.to_numeric(nodes[col], errors="coerce")
        if nodes.isnull().any().any():
            nan_nodes = nodes[nodes.isnull().any(axis=1)]
            nan_node_ids = nan_nodes["node_id"].tolist()
            print(f"Scene {scene_id}: Filtering {len(nan_node_ids)} nodes with NaN values.")
            edges = edges[~edges["source"].isin(nan_node_ids) & ~edges["target"].isin(nan_node_ids)]
            nodes = nodes.dropna(subset=["future_x", "future_y"])
        if (edges["source"] == -1).any() or (edges["target"] == -1).any():
            print(f"Scene {scene_id} contains -1 edges. Removing these edges.")
            edges = edges[(edges["source"] != -1) & (edges["target"] != -1)]
            connected_nodes = pd.unique(edges[["target", "source"]].values.ravel())
            nodes = nodes[nodes["node_id"].isin(connected_nodes)]
        scenes.append({"scene_id": scene_id, "edges": edges, "nodes": nodes})
    return scenes


# Define feature and target columns
feature_cols = ["current_x", "current_y", "previous_x", "previous_y"]
target_cols = ["future_x", "future_y"]


# def convert_scene_to_tensors(scene):
#     nodes_df = scene["nodes"].reset_index(drop=True)
#     edges_df = scene["edges"].reset_index(drop=True)
#     features = nodes_df[feature_cols].to_numpy().astype(np.float32)
#     targets = nodes_df[target_cols].to_numpy().astype(np.float32)
#     edges = edges_df.to_numpy().astype(np.int32)
#     return features, edges, targets


def convert_scene_to_tensors(scene):
    nodes_df = scene["nodes"].reset_index(drop=True)
    edges_df = scene["edges"].reset_index(drop=True)
    # Map node_id to local index
    node_id_to_idx = {nid: i for i, nid in enumerate(nodes_df["node_id"])}
    # Remap edges to local indices
    edges_df = edges_df.copy()
    edges_df["target"] = edges_df["target"].map(node_id_to_idx)
    edges_df["source"] = edges_df["source"].map(node_id_to_idx)
    # Drop edges with missing nodes (after mapping)
    edges_df = edges_df.dropna().astype(int)
    features = nodes_df[feature_cols].to_numpy().astype(np.float32)
    targets = nodes_df[target_cols].to_numpy().astype(np.float32)
    edges = edges_df.to_numpy().astype(np.int32)
    return features, edges, targets


def split_scenes(scenes, train_ratio=0.7, val_ratio=0.15):
    np.random.shuffle(scenes)
    n_total = len(scenes)
    n_train = int(n_total * train_ratio)
    n_val = int(n_total * val_ratio)
    train_scenes = scenes[:n_train]
    val_scenes = scenes[n_train : n_train + n_val]
    test_scenes = scenes[n_train + n_val :]
    return train_scenes, val_scenes, test_scenes


scenes = load_all_subgraphs(dataset_dir)
print(f"Loaded {len(scenes)} scenes.")
train_scenes, val_scenes, test_scenes = split_scenes(scenes, train_ratio=0.7, val_ratio=0.15)
print(f"Train scenes: {len(train_scenes)}, Val scenes: {len(val_scenes)}, Test scenes: {len(test_scenes)}")


def scene_generator(scene_list):
    for scene in scene_list:
        yield convert_scene_to_tensors(scene)


train_dataset = tf.data.Dataset.from_generator(
    lambda: scene_generator(train_scenes),
    output_signature=(
        tf.TensorSpec(shape=(None, len(feature_cols)), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 2), dtype=tf.int32),
        tf.TensorSpec(shape=(None, len(target_cols)), dtype=tf.float32),
    ),
)
val_dataset = tf.data.Dataset.from_generator(
    lambda: scene_generator(val_scenes),
    output_signature=(
        tf.TensorSpec(shape=(None, len(feature_cols)), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 2), dtype=tf.int32),
        tf.TensorSpec(shape=(None, len(target_cols)), dtype=tf.float32),
    ),
)
test_dataset = tf.data.Dataset.from_generator(
    lambda: scene_generator(test_scenes),
    output_signature=(
        tf.TensorSpec(shape=(None, len(feature_cols)), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 2), dtype=tf.int32),
        tf.TensorSpec(shape=(None, len(target_cols)), dtype=tf.float32),
    ),
)


def squeeze_batch(features, edges, targets):
    return tf.squeeze(features, axis=0), tf.squeeze(edges, axis=0), tf.squeeze(targets, axis=0)


train_dataset = train_dataset.shuffle(100).batch(1).map(squeeze_batch)
val_dataset = val_dataset.batch(1).map(squeeze_batch)
test_dataset = test_dataset.batch(1).map(squeeze_batch)


# -------------------------
# Define Model Components
# -------------------------
class GraphAttention(layers.Layer):
    def __init__(self, units, kernel_initializer="glorot_uniform", kernel_regularizer=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.kernel_initializer = keras.initializers.get(kernel_initializer)
        self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)

    def build(self, input_shape):
        self.kernel = self.add_weight(
            shape=(input_shape[0][-1], self.units),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel",
        )
        self.kernel_attention = self.add_weight(
            shape=(self.units * 2, 1),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel_attention",
        )
        super().build(input_shape)

    def call(self, inputs):
        node_states, edges = inputs
        node_states_transformed = tf.matmul(node_states, self.kernel)
        target_states = tf.gather(node_states_transformed, edges[:, 0])
        source_states = tf.gather(node_states_transformed, edges[:, 1])
        concat_features = tf.concat([target_states, source_states], axis=-1)
        attention_scores = tf.nn.leaky_relu(tf.matmul(concat_features, self.kernel_attention))
        attention_scores = tf.squeeze(attention_scores, axis=-1)
        attention_scores = tf.exp(tf.clip_by_value(attention_scores, -2, 2))
        num_nodes = tf.shape(node_states)[0]
        attention_sum = tf.math.unsorted_segment_sum(attention_scores, segment_ids=edges[:, 0], num_segments=num_nodes)
        normalized_attention = attention_scores / tf.gather(attention_sum, edges[:, 0])
        node_states_neighbors = tf.gather(node_states_transformed, edges[:, 1])
        out = tf.math.unsorted_segment_sum(
            data=node_states_neighbors * normalized_attention[:, tf.newaxis],
            segment_ids=edges[:, 0],
            num_segments=num_nodes,
        )
        return out


class MultiHeadGraphAttention(layers.Layer):
    def __init__(self, units, num_heads=8, merge_type="concat", **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.merge_type = merge_type
        self.attention_layers = [GraphAttention(units) for _ in range(num_heads)]

    def call(self, inputs):
        node_features, edges = inputs
        outputs = [attn([node_features, edges]) for attn in self.attention_layers]
        if self.merge_type == "concat":
            out = tf.concat(outputs, axis=-1)
        else:
            out = tf.reduce_mean(tf.stack(outputs, axis=-1), axis=-1)
        return tf.nn.relu(out)


class GraphAttentionNetwork(keras.Model):
    def __init__(self, hidden_units, num_heads, num_layers, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.preprocess = layers.Dense(hidden_units * num_heads, activation="relu")
        self.attention_layers = [MultiHeadGraphAttention(hidden_units, num_heads) for _ in range(num_layers)]
        self.output_layer = layers.Dense(output_dim)

    def call(self, inputs, training=False):
        # Since our dataset is unbatched, node_features is expected to have shape [n_nodes, feature_dim]
        node_features, edges = inputs

        # Directly use inputs since no batch dimension is present.
        x = self.preprocess(node_features)
        for attn_layer in self.attention_layers:
            x_new = attn_layer([x, edges])
            x = x + x_new  # residual connection
        outputs = self.output_layer(x)
        return outputs

    def train_step(self, data):
        node_features, edges, targets = data
        with tf.GradientTape() as tape:
            outputs = self([node_features, edges], training=True)
            loss = self.compiled_loss(targets, outputs)
        grads = tape.gradient(loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.compiled_metrics.update_state(targets, outputs)
        return {m.name: m.result() for m in self.metrics}

    def predict_step(self, data):
        node_features, edges, _ = data
        outputs = self([node_features, edges], training=False)
        return outputs

    def test_step(self, data):
        node_features, edges, targets = data
        outputs = self([node_features, edges], training=False)
        loss = self.compiled_loss(targets, outputs)
        self.compiled_metrics.update_state(targets, outputs)
        return {m.name: m.result() for m in self.metrics}


# -------------------------
# Model Training & Evaluation
# -------------------------
HIDDEN_UNITS = 100
NUM_HEADS = 8
NUM_LAYERS = 3
OUTPUT_DIM = 2
NUM_EPOCHS = 100
LEARNING_RATE = 1e-4

def mean_euclidean_distance(y_true, y_pred):
    return tf.reduce_mean(tf.norm(y_true - y_pred, axis=-1))

loss_fn = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
metrics = [
    keras.metrics.MeanAbsoluteError(),
    keras.metrics.MeanSquaredError(),
    keras.metrics.RootMeanSquaredError(name="rmse"),
    keras.metrics.R2Score(),
    mean_euclidean_distance,
]

gat_model = GraphAttentionNetwork(
    hidden_units=HIDDEN_UNITS,
    num_heads=NUM_HEADS,
    num_layers=NUM_LAYERS,
    output_dim=OUTPUT_DIM,
)
gat_model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)

early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", min_delta=1e-5, patience=5, verbose=1, restore_best_weights=True, start_from_epoch=5
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor="val_loss", factor=0.1, patience=10, verbose=1, min_delta=1e-4, min_lr=1e-6
)

# Add this after you define `targets_np`
print("Sample target values (future_x, future_y):", targets_np[:5])
print("Target mean:", np.mean(targets_np, axis=0), "Target std:", np.std(targets_np, axis=0))

print("Training...")
history = gat_model.fit(
    train_dataset,
    epochs=NUM_EPOCHS,
    validation_data=val_dataset,
    callbacks=[reduce_lr],
    verbose=2,
)


print("Evaluating on test dataset...")
results = gat_model.evaluate(test_dataset, verbose=2)
print("Test metrics:", results)

print("\nSample predictions for test scenes:")
for features, edges, targets in test_dataset.take(1):
    predictions = gat_model((features, edges), training=False)  # Use direct call, not .predict
    for i in range(min(5, predictions.shape[0])):
        print(
            f"Node {i}: True future_x={targets[i, 0]:.1f}, future_y={targets[i, 1]:.1f} | Predicted future_x={predictions[i, 0]:.1f}, future_y={predictions[i, 1]:.1f}"
        )

# After predictions = gat_model((features, edges), training=False)
plt.scatter(targets[:20, 0], targets[:20, 1], label="True", c="g")
plt.scatter(predictions[:20, 0], predictions[:20, 1], label="Pred", c="r", marker="x")
plt.legend()
plt.xlabel("future_x")
plt.ylabel("future_y")
plt.title("True vs Predicted Future Positions")
plt.show()

end_time = time.time()
running_time = end_time - start_time
hours = int(running_time // 3600)
minutes = int((running_time % 3600) // 60)
seconds = int(running_time % 60)
print(f"Running time: {hours} hours, {minutes} minutes, {seconds} seconds")

TensorFlow Version: 2.18.0
No GPU detected. Training will run on CPU.
Scene 1352890817715: Filtering 1 nodes with NaN values.
Scene 1352890817715 contains -1 edges. Removing these edges.
Scene 1352890814428: Filtering 1 nodes with NaN values.
Scene 1352890814428 contains -1 edges. Removing these edges.
Scene 1352890802323 contains -1 edges. Removing these edges.
Scene 1352890800322: Filtering 1 nodes with NaN values.
Scene 1352890800322 contains -1 edges. Removing these edges.
Scene 1352890875617 contains -1 edges. Removing these edges.
Scene 1352890804562: Filtering 1 nodes with NaN values.
Scene 1352890804562 contains -1 edges. Removing these edges.
Scene 1352890841688: Filtering 1 nodes with NaN values.
Scene 1352890841688 contains -1 edges. Removing these edges.
Scene 1352890837555 contains -1 edges. Removing these edges.
Scene 1352890825684 contains -1 edges. Removing these edges.
Scene 1352890801553: Filtering 1 nodes with NaN values.
Scene 1352890801553 contains -1 edges. Removi

In [8]:
"""
Title: GAT Regression for Pedestrian Future Position Prediction
Description:
    This script demonstrates how to use a Graph Attention Network (GAT)
    for a regression task over pedestrian trajectory data.

    Each scene is treated as a separate graph. The nodes represent
    pedestrians with features (e.g. current position, previous motion, etc.)
    and the edges represent interactions (or connectivity) between them.

    The model learns to predict the pedestrian's future position, namely
    future_x and future_y one second ahead.

Author: Your Name
Date: 2025-04-13
"""

import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Model
import warnings

warnings.filterwarnings("ignore")
np.random.seed(2)

# ------------------------------------------------------------------------------
# Data Loading and Preprocessing
# ------------------------------------------------------------------------------


# Define the dataset directory
dataset_dir = "dataset"


# Function to find all scene IDs in the dataset directory
def find_all_scene_ids(dataset_dir):
    scene_ids = []
    for file in os.listdir(dataset_dir):
        if file.endswith(".edges"):
            scene_id = file.split(".")[0]
            scene_ids.append(scene_id)
    return scene_ids


# Function to load all subgraphs for the found scene IDs
def load_all_subgraphs(dataset_dir):
    scene_ids = find_all_scene_ids(dataset_dir)
    scenes = []

    for scene_id in scene_ids:

        edges_file = os.path.join(dataset_dir, f"{scene_id}.edges")
        nodes_file = os.path.join(dataset_dir, f"{scene_id}.nodes")

        # Check if both files exist
        if not os.path.exists(edges_file) or not os.path.exists(nodes_file):
            print(f"Skipping scene ID {scene_id}: Missing files.")
            continue

        # Load edges
        edges = pd.read_csv(edges_file, sep=",", header=None, names=["target", "source"])

        # Load nodes
        nodes = pd.read_csv(
            nodes_file,
            sep=",",
            header=None,
            names=["node_id", "current_x", "current_y", "previous_x", "previous_y", "future_x", "future_y"],
        )

        for col in nodes.columns:
            nodes[col] = pd.to_numeric(nodes[col], errors="coerce")

        if nodes.isnull().any().any():
            # Step 1: Identify rows with NaN values in nodes_df
            nan_nodes = nodes[nodes.isnull().any(axis=1)]

            # Step 2: Extract the node_id values of those rows
            nan_node_ids = nan_nodes["node_id"].tolist()

            # Step 3: Filter out edges in edges_df where source or target is in nan_node_ids
            # Display the filtered edges
            print(f"Original edges count: {len(edges)}")
            print(f"Original nodes count: {len(nodes)}")
            edges = edges[~edges["source"].isin(nan_node_ids) & ~edges["target"].isin(nan_node_ids)]

            print(f"Filtered edges count: {len(edges)}")
            nodes = nodes.dropna(subset=["future_x", "future_y"])
            print(f"Filtered nodes count: {len(nodes)}")

        # # Filter out edges with -1 as source value
        # edges = edges[edges["source"] != -1]

        # Check if there are any -1 edges
        if (edges["source"] == -1).any() or (edges["target"] == -1).any():
            print(f"Scene ID {scene_id} contains -1 edges. Processing...")

            # Remove edges with -1 as source or target
            edges = edges[(edges["source"] != -1) & (edges["target"] != -1)]

            # Get unique node IDs from the remaining edges
            connected_nodes = pd.unique(edges[["target", "source"]].values.ravel())

            # Filter nodes to keep only those that are connected
            nodes = nodes[nodes["node_id"].isin(connected_nodes)]

        # Store the subgraph
        scenes.append(
            {"scene_id": scene_id, "edges": edges, "nodes": nodes},
        )

    return scenes


# Example usage
scenes = load_all_subgraphs(dataset_dir)
print(f"Loaded {len(scenes)} scenes.")

Original edges count: 15
Original nodes count: 13
Filtered edges count: 14
Filtered nodes count: 13
Scene ID 1352890817715 contains -1 edges. Processing...
Original edges count: 16
Original nodes count: 14
Filtered edges count: 15
Filtered nodes count: 13
Scene ID 1352890814428 contains -1 edges. Processing...
Scene ID 1352890802323 contains -1 edges. Processing...
Original edges count: 23
Original nodes count: 12
Filtered edges count: 22
Filtered nodes count: 11
Scene ID 1352890800322 contains -1 edges. Processing...
Scene ID 1352890875617 contains -1 edges. Processing...
Original edges count: 16
Original nodes count: 13
Filtered edges count: 13
Filtered nodes count: 13
Scene ID 1352890804562 contains -1 edges. Processing...
Original edges count: 14
Original nodes count: 10
Filtered edges count: 13
Filtered nodes count: 9
Scene ID 1352890841688 contains -1 edges. Processing...
Scene ID 1352890837555 contains -1 edges. Processing...
Scene ID 1352890825684 contains -1 edges. Processing.

In [9]:
def aggregate_scenes(scenes):
    nodes_list = []
    edges_list = []
    scene_node_indices = {}
    node_offset = 0

    for scene in scenes:
        scene_id = scene["scene_id"]
        nodes_df = scene["nodes"].copy().reset_index(drop=True)
        edges_df = scene["edges"].copy().reset_index(drop=True)
        num_nodes = nodes_df.shape[0]
        scene_node_indices[scene_id] = np.arange(node_offset, node_offset + num_nodes)

        # Map original node_id to aggregated index.
        node_id_to_index = dict(zip(nodes_df["node_id"], range(node_offset, node_offset + num_nodes)))
        edges_df["target"] = edges_df["target"].apply(lambda x: node_id_to_index.get(x, -1))
        edges_df["source"] = edges_df["source"].apply(lambda x: node_id_to_index.get(x, -1))
        edges_df = edges_df[(edges_df["target"] != -1) & (edges_df["source"] != -1)]
        nodes_list.append(nodes_df)
        edges_list.append(edges_df)
        node_offset += num_nodes

    all_nodes = pd.concat(nodes_list, ignore_index=True)
    all_edges = pd.concat(edges_list, ignore_index=True).to_numpy().astype(np.int32)
    return all_nodes, all_edges, scene_node_indices


def scene_based_split(scene_node_indices, train_ratio=0.5):
    scene_ids = np.array(list(scene_node_indices.keys()))
    np.random.shuffle(scene_ids)
    n_train = int(len(scene_ids) * train_ratio)
    train_scenes = scene_ids[:n_train]
    test_scenes = scene_ids[n_train:]
    train_indices = np.concatenate([scene_node_indices[sid] for sid in train_scenes])
    test_indices = np.concatenate([scene_node_indices[sid] for sid in test_scenes])
    return train_indices, test_indices


def create_train_val_split(train_indices, val_ratio=0.1):
    # Randomly split the train_indices into training and validation sets.
    np.random.shuffle(train_indices)
    n_val = int(len(train_indices) * val_ratio)
    val_indices = train_indices[:n_val]
    train_indices_new = train_indices[n_val:]
    return train_indices_new, val_indices

In [10]:
def graph_generator(graphs):
    for graph in graphs:
        yield (
            graph["node_features"],
            graph["edges"],
            graph["targets"],
        )


def create_graph_dataset(graphs, batch_size=1, shuffle=True):
    ds = tf.data.Dataset.from_generator(
        lambda: graph_generator(graphs),
        output_signature=(
            tf.TensorSpec(shape=(None, 4), dtype=tf.float32),  # node features
            tf.TensorSpec(shape=(None, 2), dtype=tf.int32),  # edges
            tf.TensorSpec(shape=(None, 2), dtype=tf.float32),  # targets
        ),
    )
    if shuffle:
        ds = ds.shuffle(buffer_size=len(graphs))
    return ds.batch(batch_size)  # batch of scenes, NOT nodes

In [19]:
scenes[0]["nodes"]

Unnamed: 0,node_id,current_x,current_y,previous_x,previous_y,future_x,future_y
1,19585800,27320.0,-17405.0,26571.0,-16671.0,28033.0,-17874.0
2,19590700,27689.0,-16188.0,26793.0,-15576.0,28276.0,-16736.0
3,19591900,21413.0,-13728.0,20600.0,-13198.0,22412.0,-14133.0
4,19592201,23532.0,-14909.0,22635.0,-14512.0,24269.0,-15188.0
7,19595300,21494.0,-13198.0,20730.0,-12614.0,22476.0,-13642.0
8,19595800,22586.0,-14574.0,21735.0,-14220.0,23468.0,-14789.0
9,20000200,18815.0,-12050.0,17424.0,-11269.0,19831.0,-12739.0
10,20000300,18783.0,-11503.0,17802.0,-10627.0,20195.0,-12241.0
11,20000700,17579.0,-13447.0,16969.0,-12547.0,18601.0,-14030.0


In [11]:
class GraphAttentionLayer(layers.Layer):
    def __init__(self, units):
        super().__init__()
        self.units = units
        self.dense = layers.Dense(units)
        self.attn_dense = layers.Dense(1)

    def call(self, node_features, edges):
        h = self.dense(node_features)  # [N, units]
        src = tf.gather(h, edges[:, 0])
        tgt = tf.gather(h, edges[:, 1])
        concat = tf.concat([src, tgt], axis=-1)
        e = tf.nn.leaky_relu(self.attn_dense(concat))  # [E, 1]
        e = tf.squeeze(e, axis=1)

        # Normalize using softmax per target node
        attn_weights = tf.exp(e)
        attn_sum = tf.math.unsorted_segment_sum(attn_weights, edges[:, 0], tf.shape(node_features)[0])
        attn_norm = attn_weights / tf.gather(attn_sum, edges[:, 0])

        neighbor_feats = tf.gather(h, edges[:, 1]) * attn_norm[:, None]
        aggregated = tf.math.unsorted_segment_sum(neighbor_feats, edges[:, 0], tf.shape(node_features)[0])
        return tf.nn.relu(aggregated)


class GATRegressor(Model):
    def __init__(self, hidden_units=64, output_dim=2, num_layers=2):
        super().__init__()
        self.input_proj = layers.Dense(hidden_units, activation="relu")
        self.attn_layers = [GraphAttentionLayer(hidden_units) for _ in range(num_layers)]
        self.out_proj = layers.Dense(output_dim)

    def call(self, node_features, edges):
        x = self.input_proj(node_features)
        for attn in self.attn_layers:
            x = attn(x, edges) + x  # residual
        return self.out_proj(x)  # [N, 2]

In [12]:
def train_model(model, dataset, optimizer, loss_fn, epochs=10):
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        epoch_loss = []
        for step, (x, edge_index, y_true) in enumerate(dataset):
            x = tf.squeeze(x, axis=0)
            edge_index = tf.squeeze(edge_index, axis=0)
            y_true = tf.squeeze(y_true, axis=0)

            with tf.GradientTape() as tape:
                y_pred = model(x, edge_index)
                loss = loss_fn(y_true, y_pred)

            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))
            epoch_loss.append(loss.numpy())

        print(f"Loss: {np.mean(epoch_loss):.4f}")

In [14]:
scenes[8:]

[{'scene_id': '1352890825684',
  'edges':       target    source
  0   19585800  19590700
  1   19591900  19592201
  2   19591900  19595300
  3   19591900  19595800
  4   19591900  20000200
  5   19591900  20000300
  6   19591900  20001800
  7   19592201  19595300
  8   19592201  19595800
  9   19592201  20000200
  10  19592201  20000300
  11  19592201  20001800
  12  19595300  19595800
  13  19595300  20000200
  14  19595300  20000300
  15  19595300  20001800
  16  19595800  20000200
  17  19595800  20000300
  18  19595800  20001800
  19  20000200  20000300
  20  20000200  20000700
  21  20000300  20000700,
  'nodes':      node_id  current_x  current_y  previous_x  previous_y  future_x  future_y
  1   19585800    34509.0   -19885.0     33787.0    -19581.0   35325.0  -20296.0
  2   19590700    34195.0   -19289.0     33719.0    -18845.0   34602.0  -19701.0
  3   19591900    29432.0   -16554.0     28544.0    -16355.0   30360.0  -16811.0
  4   19592201    30123.0   -17672.0     29218.0   

In [15]:
# Assuming you have a `mock_graphs` list from earlier

train_ds = create_graph_dataset(scenes[:8])
val_ds = create_graph_dataset(scenes[8:], shuffle=False)

model = GATRegressor(hidden_units=64, num_layers=2)
optimizer = tf.keras.optimizers.Adam(1e-3)
loss_fn = tf.keras.losses.MeanSquaredError()

train_model(model, train_ds, optimizer, loss_fn, epochs=20)

2025-04-14 21:14:55.089413: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
2025-04-14 21:14:55.218776: W tensorflow/core/framework/op_kernel.cc:1829] UNKNOWN: KeyError: 'node_features'
Traceback (most recent call last):

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/tmp/ipykernel_2842518/2017478153.py", line 4, in graph_gene


Epoch 1/20


UnknownError: {{function_node __wrapped__IteratorGetNext_output_types_3_device_/job:localhost/replica:0/task:0/device:CPU:0}} KeyError: 'node_features'
Traceback (most recent call last):

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/ops/script_ops.py", line 269, in __call__
    ret = func(*args)
          ^^^^^^^^^^^

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^

  File "/home/joel/.local/lib/python3.12/site-packages/tensorflow/python/data/ops/from_generator_op.py", line 198, in generator_py_func
    values = next(generator_state.get_iterator(iterator_id))
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/tmp/ipykernel_2842518/2017478153.py", line 4, in graph_generator
    graph["node_features"],
    ~~~~~^^^^^^^^^^^^^^^^^

KeyError: 'node_features'


	 [[{{node PyFunc}}]] [Op:IteratorGetNext] name: 

In [5]:
# ------------------------------------------------------------------------------
# GAT Model Components for Regression
# ------------------------------------------------------------------------------


class GraphAttention(layers.Layer):
    def __init__(self, units, kernel_initializer="glorot_uniform", kernel_regularizer=None, **kwargs):
        super().__init__(**kwargs)
        self.units = units
        self.kernel_initializer = keras.initializers.get(kernel_initializer)
        self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)

    def build(self, input_shape):
        self.kernel = self.add_weight(
            shape=(input_shape[0][-1], self.units),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel",
        )
        self.kernel_attention = self.add_weight(
            shape=(self.units * 2, 1),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel_attention",
        )
        self.built = True

    def call(self, inputs):
        node_states, edges = inputs

        # Linearly transform node states
        node_states_transformed = tf.matmul(node_states, self.kernel)

        # (1) Compute pair-wise attention scores
        target_states = tf.gather(node_states_transformed, edges[:, 0])
        source_states = tf.gather(node_states_transformed, edges[:, 1])
        concat_features = tf.concat([target_states, source_states], axis=-1)
        attention_scores = tf.nn.leaky_relu(tf.matmul(concat_features, self.kernel_attention))
        attention_scores = tf.squeeze(attention_scores, axis=-1)

        # (2) Normalize attention scores
        attention_scores = tf.exp(tf.clip_by_value(attention_scores, -2, 2))
        attention_scores_sum = tf.math.unsorted_segment_sum(
            data=attention_scores, 
            segment_ids=edges[:, 0],
            # num_segments=tf.shape(node_states)[0],
            num_segments=tf.reduce_max(edges[:, 0]) + 1,
        )
        attention_scores_sum = tf.repeat(attention_scores_sum, tf.math.bincount(tf.cast(edges[:, 0], "int32")))
        attention_scores_norm = attention_scores / attention_scores_sum

        # (3) Gather node states of neighbors, apply attention scores and aggregate
        node_states_neighbors = tf.gather(node_states_transformed, edges[:, 1])
        out = tf.math.unsorted_segment_sum(
            data=node_states_neighbors * attention_scores_norm[:, tf.newaxis],
            segment_ids=edges[:, 0],
            num_segments=tf.shape(node_states)[0],
        )
        return out


class MultiHeadGraphAttention(layers.Layer):
    def __init__(self, units, num_heads=8, merge_type="concat", **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.merge_type = merge_type
        self.attention_layers = [GraphAttention(units) for _ in range(num_heads)]

    def call(self, inputs):
        atom_features, pair_indices = inputs

        # Obtain outputs from each attention head
        outputs = [attention_layer([atom_features, pair_indices]) for attention_layer in self.attention_layers]
        # Concatenate or average the node states from each head
        if self.merge_type == "concat":
            outputs = tf.concat(outputs, axis=-1)
        else:
            outputs = tf.reduce_mean(tf.stack(outputs, axis=-1), axis=-1)
        # Activate and return node states
        return tf.nn.relu(outputs)


class GraphAttentionNetwork(keras.Model):
    def __init__(self, node_states, edges, hidden_units, num_heads, num_layers, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.node_states = node_states
        self.edges = edges
        self.preprocess = layers.Dense(hidden_units * num_heads, activation="relu")
        self.attention_layers = [MultiHeadGraphAttention(hidden_units, num_heads) for _ in range(num_layers)]
        self.output_layer = layers.Dense(output_dim)

    def call(self, inputs):
        node_states, edges = inputs
        x = self.preprocess(node_states)
        for attention_layer in self.attention_layers:
            x = attention_layer([x, edges]) + x
        outputs = self.output_layer(x)
        return outputs

    def train_step(self, data):
        indices, labels = data

        with tf.GradientTape() as tape:
            # Forward pass
            outputs = self([self.node_states, self.edges])
            # Compute loss
            loss = self.compiled_loss(labels, tf.gather(outputs, indices))
        # Compute gradients
        grads = tape.gradient(loss, self.trainable_weights)
        # Apply gradients (update weights)
        optimizer.apply_gradients(zip(grads, self.trainable_weights))
        # Update metric(s)
        self.compiled_metrics.update_state(labels, tf.gather(outputs, indices))

        return {m.name: m.result() for m in self.metrics}

    def predict_step(self, data):
        indices = data
        # Forward pass
        outputs = self([self.node_states, self.edges])
        # Compute probabilities
        return tf.nn.softmax(tf.gather(outputs, indices))

    def test_step(self, data):
        indices, labels = data
        # Forward pass
        outputs = self([self.node_states, self.edges])
        # Compute loss
        loss = self.compiled_loss(labels, tf.gather(outputs, indices))
        # Update metric(s)
        self.compiled_metrics.update_state(labels, tf.gather(outputs, indices))

        return {m.name: m.result() for m in self.metrics}

In [9]:
# ------------------------------------------------------------------------------
# Main: Data Preparation, Model Training, and Evaluation
# ------------------------------------------------------------------------------

if __name__ == "__main__":
    all_nodes, all_edges, scene_node_indices = aggregate_scenes(scenes)
    train_indices, test_indices = scene_based_split(scene_node_indices, train_ratio=0.8)
    train_indices, val_indices = create_train_val_split(train_indices, val_ratio=0.1)

    # Define features and targets
    feature_cols = [col for col in all_nodes.columns if col not in ["node_id", "future_x", "future_y"]]
    target_cols = ["future_x", "future_y"]
    node_features_np = all_nodes[feature_cols].to_numpy().astype(np.float32)
    targets_np = all_nodes[target_cols].to_numpy().astype(np.float32)

    print("Aggregated nodes shape:", node_features_np.shape)
    print("Aggregated edges shape:", all_edges.shape)
    print(
        "Training nodes:",
        train_indices.shape,
        "Validation nodes:",
        val_indices.shape,
        "Test nodes:",
        test_indices.shape,
    )

    node_features_tensor = tf.convert_to_tensor(node_features_np)
    edges_tensor = tf.convert_to_tensor(all_edges)

    # Define hyper-parameters
    HIDDEN_UNITS = 100
    NUM_HEADS = 8
    NUM_LAYERS = 3
    OUTPUT_DIM = 2
    NUM_EPOCHS = 100
    BATCH_SIZE = 16
    LEARNING_RATE = 1e-2

    loss_fn = keras.losses.MeanSquaredError()
    
    optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    
    metrics = [
        keras.metrics.MeanAbsoluteError(),
        keras.metrics.MeanSquaredError(),
        keras.metrics.R2Score(),
    ]

    early_stopping = keras.callbacks.EarlyStopping(
        monitor="val_loss",
        patience=15,
        restore_best_weights=True,
        verbose=1,
    )

    # Build the model
    gat_model = GraphAttentionNetwork(
        node_states=node_features_tensor,
        edges=edges_tensor,
        hidden_units=HIDDEN_UNITS,
        num_heads=NUM_HEADS,
        num_layers=NUM_LAYERS,
        output_dim=OUTPUT_DIM,
    )

    # Compile the model with MSE for loss and MAE as a metric.
    gat_model.compile(
        optimizer=optimizer,
        loss=loss_fn,
        metrics=metrics,
    )

    # Prepare tf.data.Datasets for training, validation, and testing.
    train_dataset = tf.data.Dataset.from_tensor_slices((train_indices, targets_np[train_indices]))
    train_dataset = train_dataset.shuffle(buffer_size=len(train_indices)).batch(BATCH_SIZE)

    val_dataset = tf.data.Dataset.from_tensor_slices((val_indices, targets_np[val_indices]))
    val_dataset = val_dataset.batch(BATCH_SIZE)

    test_dataset = tf.data.Dataset.from_tensor_slices((test_indices, targets_np[test_indices]))
    test_dataset = test_dataset.batch(BATCH_SIZE)

    # Set up callbacks: ReduceLROnPlateau and EarlyStopping.
    reduce_lr = keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss",
        factor=0.1,
        patience=10,
        verbose=1,
        mode="auto",
        min_delta=0.0001,
        cooldown=0,
        min_lr=0.0,
    )

    print("Training...")
    gat_model.fit(
        train_dataset,
        epochs=NUM_EPOCHS,
        validation_data=val_dataset,
        callbacks=[reduce_lr, early_stopping],
        verbose=2,
    )

Aggregated nodes shape: (1669, 4)
Aggregated edges shape: (2721, 2)
Training nodes: (1215,) Validation nodes: (135,) Test nodes: (319,)
Training...
Epoch 1/100
76/76 - 31s - 403ms/step - mean_absolute_error: 2284.4680 - mean_squared_error: 17410654.0000 - r2_score: 0.7833 - loss: 5324.3389 - val_loss: 4744.1011 - learning_rate: 0.0100
Epoch 2/100
76/76 - 12s - 164ms/step - mean_absolute_error: 1030.7981 - mean_squared_error: 1948120.0000 - r2_score: 0.9856 - loss: 5881.7021 - val_loss: 5401.6279 - learning_rate: 0.0100
Epoch 3/100
76/76 - 11s - 151ms/step - mean_absolute_error: 848.1266 - mean_squared_error: 2726958.7500 - r2_score: 0.9734 - loss: 5860.3389 - val_loss: 6459.4458 - learning_rate: 0.0100
Epoch 4/100
76/76 - 12s - 157ms/step - mean_absolute_error: 757.8577 - mean_squared_error: 2847388.7500 - r2_score: 0.9796 - loss: 5848.6724 - val_loss: 6395.3423 - learning_rate: 0.0100
Epoch 5/100
76/76 - 12s - 157ms/step - mean_absolute_error: 788.7696 - mean_squared_error: 1140395.25

KeyboardInterrupt: 

In [None]:
print("Evaluating on test set...")
results = gat_model.evaluate(test_dataset, verbose=2)
print(f"\nTest Loss (MSE): {results[0]:.4f}, Test MAE: {results[1]["mean_absolute_error"]:.4f}")

# Run predictions on test nodes
print("\nSample predictions for test nodes:")
predictions = gat_model.predict(tf.convert_to_tensor(test_indices))
for i, idx in enumerate(test_indices[:5]):
    print(
        f"Node {idx}: True future_x={targets_np[idx,0]:.1f}, future_y={targets_np[idx,1]:.1f} | Predicted future_x={predictions[i,0]:.1f}, future_y={predictions[i,1]:.1f}"
    )

Evaluating on test set...
21/21 - 1s - 34ms/step - mean_absolute_error: 294.7548 - mean_squared_error: 143900.9375 - r2_score: 0.9984 - loss: 57632.1484

Test Loss (MSE): 57632.1484, Test MAE: 294.7548

Sample predictions for test nodes:
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Node 684: True future_x=18656.0, future_y=-12362.0 | Predicted future_x=18322.5, future_y=-12147.2
Node 685: True future_x=18171.0, future_y=-11549.0 | Predicted future_x=18022.2, future_y=-11483.6
Node 686: True future_x=12779.0, future_y=-7350.0 | Predicted future_x=12806.2, future_y=-7281.3
Node 687: True future_x=9263.0, future_y=1645.0 | Predicted future_x=9479.1, future_y=749.9
Node 688: True future_x=20974.0, future_y=-13035.0 | Predicted future_x=20800.4, future_y=-13002.7
