Assignment 2 -Task 1

Import Packges

In [118]:
import tensorflow as tf
from tensorflow import keras
from keras import layers  
import numpy as np
import pandas as pd
import os
import warnings
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")
pd.set_option("display.max_columns", 6)
pd.set_option("display.max_rows", 6)
np.random.seed(2)

 Load and Preprocess the Dataset

In [119]:
#  Path to your dataset folder
data_dir = r"D:\DL_Assignment2\DL_Assignment2\dataset (1)\dataset"

def read_edge_file(path):
    df = pd.read_csv(path, header=None)
    df.columns = ["source", "target"]
    return df

# Function to read node information files
def read_node_file(path):
    df = pd.read_csv(path, header=None)
    df.columns = ["node_id", "current_x", "current_y", "previous_x", "previous_y", "future_x", "future_y"]
    return df

edge_paths = [os.path.join(data_dir, fname) for fname in os.listdir(data_dir) if fname.endswith(".edges")]
all_edges = pd.DataFrame()
for path in edge_paths:
    all_edges = pd.concat([all_edges, read_edge_file(path)], ignore_index=True)


node_paths = [os.path.join(data_dir, fname) for fname in os.listdir(data_dir) if fname.endswith(".nodes")]
all_nodes = pd.DataFrame()
for path in node_paths:
    all_nodes = pd.concat([all_nodes, read_node_file(path)], ignore_index=True)

all_nodes.replace(to_replace='_', value=np.nan, inplace=True)

required_fields = ["future_x", "future_y", "previous_x", "previous_y"]
clean_nodes = all_nodes.dropna(subset=required_fields).reset_index(drop=True)

all_edges.replace(-1, np.nan, inplace=True)
all_edges.dropna(inplace=True)

valid_node_ids = set(clean_nodes["node_id"].unique())
edge_nodes = set(all_edges["source"]).union(set(all_edges["target"]))
missing_nodes = edge_nodes.difference(valid_node_ids)

filtered_edges = all_edges[
    ~all_edges["source"].isin(missing_nodes) & ~all_edges["target"].isin(missing_nodes)
]

reversed_edges = filtered_edges.rename(columns={"source": "target", "target": "source"})
edges = pd.concat([filtered_edges, reversed_edges], ignore_index=True)

nodes = clean_nodes

print("Processed Edge Data:")
print(edges.head())

print("\nProcessed Node Data:")
print(nodes.head())

Processed Edge Data:
       source      target
0  19585800.0  19590700.0
1  19585800.0  19595200.0
2  19590700.0  19592400.0
3  19590700.0  19595200.0
4  19591900.0  19592201.0

Processed Node Data:
    node_id  current_x  current_y  ... previous_y future_x  future_y
0  19502500    40972.0   -16957.0  ...   -16957.0  41185.0  -16480.0
1  19585800    12688.0    -6816.0  ...    -6816.0  13381.0   -7427.0
2  19590700    12888.0    -6249.0  ...    -6249.0  13540.0   -6865.0
3  19591900     8934.0    -3797.0  ...    -3797.0   9006.0   -4048.0
4  19592201    10095.0    -5080.0  ...    -5080.0  10712.0   -5615.0

[5 rows x 7 columns]


Node ID Mapping and Feature Normalization

In [120]:
node_id_map = {original_id: new_index for new_index, original_id in enumerate(sorted(nodes["node_id"].unique()))}

nodes["node_id"] = nodes["node_id"].map(node_id_map)
edges["source"] = edges["source"].map(node_id_map)
edges["target"] = edges["target"].map(node_id_map)

features_to_scale = ["current_x", "current_y", "previous_x", "previous_y"]

# Copy raw targets first
raw_targets = nodes[["future_x", "future_y"]].copy()  # <-- Must be before any scaling

# Fit target scaler first using raw targets
target_scaler = MinMaxScaler()
target_scaler.fit(raw_targets)

# Now scale only future_x, future_y using the same scaler
nodes[["future_x", "future_y"]] = target_scaler.transform(raw_targets)

# Apply feature scaling separately
features_to_scale = ["current_x", "current_y", "previous_x", "previous_y"]
feature_scaler = MinMaxScaler()
nodes[features_to_scale] = feature_scaler.fit_transform(nodes[features_to_scale])


print("Scaled Node Feature Values:")
print(nodes[features_to_scale].head())


Scaled Node Feature Values:
   current_x  current_y  previous_x  previous_y
0   0.925234   0.172395    0.927256    0.174749
1   0.581439   0.405838    0.586045    0.413002
2   0.583870   0.418890    0.588457    0.426323
3   0.535809   0.475334    0.540757    0.483930
4   0.549921   0.445800    0.554763    0.453787


Split into Train and Test Sets

In [121]:
# Obtain random indices
random_indices = np.random.permutation(range(nodes.shape[0]))

# 50/50 split
train_data = nodes.iloc[random_indices[: len(random_indices) // 2]]
test_data = nodes.iloc[random_indices[len(random_indices) // 2 :]]


Prepare the Graph data


In [122]:
train_indices = train_data["node_id"].to_numpy()
test_indices = test_data["node_id"].to_numpy()

train_labels = train_data[["future_x", "future_y"]].astype(float).to_numpy()
test_labels = test_data[["future_x", "future_y"]].astype(float).to_numpy()
edges = tf.convert_to_tensor(edges[["source", "target"]].to_numpy(), dtype=tf.int32)
node_states = tf.convert_to_tensor(nodes.sort_values("node_id").iloc[:, 1:-2].to_numpy(), dtype=tf.float32)

print("Edges shape:\t\t", edges.shape)
print("Node features shape:", node_states.shape)

Edges shape:		 (5840, 2)
Node features shape: (2171, 4)


Build the Model

In [123]:
class GraphAttention(layers.Layer):
    def __init__(
        self,
        units,
        kernel_initializer="glorot_uniform",
        kernel_regularizer=None,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.units = units
        self.kernel_initializer = keras.initializers.get(kernel_initializer)
        self.kernel_regularizer = keras.regularizers.get(kernel_regularizer)

    def build(self, input_shape):

        self.kernel = self.add_weight(
            shape=(input_shape[0][-1], self.units),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel",
        )
        self.kernel_attention = self.add_weight(
            shape=(self.units * 2, 1),
            trainable=True,
            initializer=self.kernel_initializer,
            regularizer=self.kernel_regularizer,
            name="kernel_attention",
        )
        self.built = True

    def call(self, inputs):
        node_states, edges = inputs

        node_states_transformed = tf.matmul(node_states, self.kernel)
        node_states_expanded = tf.gather(node_states_transformed, edges)
        node_states_expanded = tf.reshape(
            node_states_expanded, (tf.shape(edges)[0], -1)
        )
        attention_scores = tf.nn.leaky_relu(
            tf.matmul(node_states_expanded, self.kernel_attention)
        )
        attention_scores = tf.squeeze(attention_scores, -1)
        attention_scores = tf.math.exp(tf.clip_by_value(attention_scores, -2, 2))
        attention_scores_sum = tf.math.unsorted_segment_sum(
            data=attention_scores,
            segment_ids=edges[:, 0],
            num_segments=tf.reduce_max(edges[:, 0]) + 1,
        )
        attention_scores_sum = tf.repeat(
            attention_scores_sum, tf.math.bincount(tf.cast(edges[:, 0], "int32"))
        )
        attention_scores_norm = attention_scores / attention_scores_sum

        node_states_neighbors = tf.gather(node_states_transformed, edges[:, 1])
        out = tf.math.unsorted_segment_sum(
            data=node_states_neighbors * attention_scores_norm[:, tf.newaxis],
            segment_ids=edges[:, 0],
            num_segments=tf.shape(node_states)[0],
        )
        return out




Multi-Head attention Layer

In [124]:
class MultiHeadGraphAttention(layers.Layer):
    def __init__(self, units, num_heads=8, merge_type="concat", **kwargs):
        super().__init__(**kwargs)
        self.num_heads = num_heads
        self.merge_type = merge_type
        self.attention_layers = [GraphAttention(units) for _ in range(num_heads)]

    def call(self, inputs):
        atom_features, pair_indices = inputs

        outputs = [
            attention_layer([atom_features, pair_indices])
            for attention_layer in self.attention_layers
        ]
        if self.merge_type == "concat":
            outputs = tf.concat(outputs, axis=-1)
        else:
            outputs = tf.reduce_mean(tf.stack(outputs, axis=-1), axis=-1)
        return tf.nn.relu(outputs)

In [125]:
class GraphAttentionNetwork(keras.Model):
    def __init__(
        self,
        node_states,
        edges,
        hidden_units,
        num_heads,
        num_layers,
        output_dim,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.node_states = node_states
        self.edges = edges
        self.preprocess = layers.Dense(hidden_units * num_heads, activation="relu")
        self.attention_layers = [
            MultiHeadGraphAttention(hidden_units, num_heads) for _ in range(num_layers)
        ]
        self.output_layer = layers.Dense(output_dim, activation=None)

    def call(self, inputs):
        node_states, edges = inputs
        x = self.preprocess(node_states)
        for attention_layer in self.attention_layers:
            x = attention_layer([x, edges]) + x
        outputs = self.output_layer(x)
        return outputs
    
    def train_step(self, data):
        indices, labels = data

        with tf.GradientTape() as tape:
            outputs = self([self.node_states, self.edges])
            loss = self.compiled_loss(labels, tf.gather(outputs, indices))
        grads = tape.gradient(loss, self.trainable_weights)
        optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.compiled_metrics.update_state(labels, tf.gather(outputs, indices))

        return {m.name: m.result() for m in self.metrics}


    def predict_step(self, data):
        indices = data
        outputs = self([self.node_states, self.edges])
        return tf.gather(outputs, indices)

    def test_step(self, data):
        indices, labels = data
        outputs = self([self.node_states, self.edges])
        loss = self.compiled_loss(labels, tf.gather(outputs, indices))
        self.compiled_metrics.update_state(labels, tf.gather(outputs, indices))

        return {m.name: m.result() for m in self.metrics}

Train and Evaluate

In [126]:
# Define hyper-parameters
HIDDEN_UNITS = 100
NUM_HEADS = 8
NUM_LAYERS = 3
OUTPUT_DIM = 2
NUM_EPOCHS = 100
BATCH_SIZE = 256
VALIDATION_SPLIT = 0.1
LEARNING_RATE = 1e-4
MOMENTUM = 0.9

def euclidean_dist(y_true, y_pred):
    return tf.reduce_mean(tf.norm(y_true - y_pred, axis=-1))

loss_fn = keras.losses.MeanSquaredError()
optimizer = keras.optimizers.SGD(LEARNING_RATE, momentum=MOMENTUM, clipnorm=1.0)
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", min_delta=1e-5, patience=5, restore_best_weights=True
)

gat_model = GraphAttentionNetwork(
    node_states, edges, HIDDEN_UNITS, NUM_HEADS, NUM_LAYERS, OUTPUT_DIM
)

gat_model.compile(loss=loss_fn, optimizer=optimizer, metrics=[euclidean_dist])

print('Training started: Task1')

x = gat_model.fit(
    x=train_indices,
    y=train_labels,
    validation_split=VALIDATION_SPLIT,
    batch_size=BATCH_SIZE,
    epochs=NUM_EPOCHS,
    callbacks=[early_stopping],
    verbose=2,
)

results = gat_model.evaluate(x=test_indices, y=test_labels, verbose=0, return_dict=True)

# Predict on test data
y_pred_scaled = gat_model.predict(test_indices)
y_true_scaled = test_labels

# Inverse transform to get original values
y_pred_denorm = target_scaler.inverse_transform(y_pred_scaled)
y_true_denorm = target_scaler.inverse_transform(y_true_scaled)

# Calculate Euclidean distance in original units
euclidean_dist = np.linalg.norm(y_true_denorm - y_pred_denorm, axis=1).mean()
print(f"\nTest Euclidean Distance (in meters): {euclidean_dist / 1000:.2f} m")


Training started: Task1
Epoch 1/100
4/4 - 26s - 6s/step - euclidean_dist: 1.1544 - loss: 0.8250 - val_loss: 0.7712
Epoch 2/100
4/4 - 1s - 278ms/step - euclidean_dist: 0.9651 - loss: 0.7129 - val_loss: 0.6052
Epoch 3/100
4/4 - 1s - 281ms/step - euclidean_dist: 0.7471 - loss: 0.5258 - val_loss: 0.3957
Epoch 4/100
4/4 - 1s - 293ms/step - euclidean_dist: 0.6168 - loss: 0.3159 - val_loss: 0.1789
Epoch 5/100
4/4 - 1s - 247ms/step - euclidean_dist: 0.7027 - loss: 0.1345 - val_loss: 0.0874
Epoch 6/100
4/4 - 1s - 268ms/step - euclidean_dist: 0.6590 - loss: 0.0944 - val_loss: 0.1240
Epoch 7/100
4/4 - 1s - 273ms/step - euclidean_dist: 0.5772 - loss: 0.1605 - val_loss: 0.2304
Epoch 8/100
4/4 - 1s - 258ms/step - euclidean_dist: 0.5918 - loss: 0.2633 - val_loss: 0.3052
Epoch 9/100
4/4 - 1s - 190ms/step - euclidean_dist: 0.5653 - loss: 0.3054 - val_loss: 0.2935
Epoch 10/100
4/4 - 1s - 171ms/step - euclidean_dist: 0.5301 - loss: 0.2724 - val_loss: 0.2383
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37