# ⚠️ GNN Delay/Risk Prediction Model

This notebook sets up a Graph Neural Network to predict high-risk (delayed) shipment routes.

In [2]:
# Install PyTorch Geometric (torch_geometric) + dependencies
import torch
torch_version = torch.__version__.split("+")[0]

!pip install torch-scatter -f https://data.pyg.org/whl/torch-{torch_version}.html
!pip install torch-sparse -f https://data.pyg.org/whl/torch-{torch_version}.html
!pip install torch-geometric


Looking in links: https://data.pyg.org/whl/torch-2.6.0.html
Collecting torch-scatter
  Downloading torch_scatter-2.1.2.tar.gz (108 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.0/108.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torch-scatter
  Building wheel for torch-scatter (setup.py) ... [?25l[?25hdone
  Created wheel for torch-scatter: filename=torch_scatter-2.1.2-cp311-cp311-linux_x86_64.whl size=547368 sha256=f1f1f9b1730fd2830a8496a6a2bb035c9e91d925ddaa2feaeb32a819b3b9f8d4
  Stored in directory: /root/.cache/pip/wheels/b8/d4/0e/a80af2465354ea7355a2c153b11af2da739cfcf08b6c0b28e2
Successfully built torch-scatter
Installing collected packages: torch-scatter
Successfully installed torch-scatter-2.1.2
Looking in links: https://data.pyg.org/whl/torch-2.6.0.html
Collecting torch-sparse
  Downloading torch_sparse-0.6.18.tar.gz (209 kB)
[2K     [90m━━━━━━━━━━━

In [3]:
# 📚 Imports
import pandas as pd
import networkx as nx
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import numpy as np

In [4]:
# 📥 Load Data
edges_df = pd.read_csv("edges.csv")
nodes_df = pd.read_csv("nodes.csv")

In [8]:
# 🧱 Build Graph and Create Features
G = nx.DiGraph()
for _, row in nodes_df.iterrows():
    G.add_node(row['node_id'], node_type=row['node_type'])

# Edge attributes and labels (1 if delay > 4 hrs, else 0)
for _, row in edges_df.iterrows():
    label = 1 if row['avg_delay_hr'] > 4 else 0
    G.add_edge(row['from_node'], row['to_node'],
               distance=row['distance_km'],
               delay=row['avg_delay_hr'],
               label=label)


In [10]:
# 🔁 Convert to PyTorch Geometric
# Manually extract edge_index and edge_attr
import torch
from torch_geometric.data import Data
# Optional: map node labels to integer IDs if they are strings
node_map = {node: idx for idx, node in enumerate(G.nodes())}
G_int = nx.relabel_nodes(G, node_map)

edge_index = []
edge_attr = []

for u, v, attrs in G_int.edges(data=True):
    edge_index.append([u, v])
    edge_attr.append([float(attrs["distance"]), float(attrs["delay"])])

edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_attr = torch.tensor(edge_attr, dtype=torch.float)

# Build Data object manually
data = Data(edge_index=edge_index, edge_attr=edge_attr)
data.y = (edge_attr[:, 1] > 4).long()  # Label for high delay

In [11]:
# 🧠 Define Simple GNN Model
class GNNModel(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(GNNModel, self).__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, 2)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [12]:
# 🚀 Train Model
model = GNNModel(in_channels=data.edge_attr.shape[1], hidden_channels=16)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

for epoch in range(50):
    model.train()
    optimizer.zero_grad()
    out = model(data.edge_attr, data.edge_index)
    loss = F.nll_loss(out, data.y)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

Epoch 0, Loss: 29.5592
Epoch 10, Loss: 10.6439
Epoch 20, Loss: 3.8028
Epoch 30, Loss: 4.3761
Epoch 40, Loss: 1.5397


In [13]:
_, predicted = model(data.edge_attr, data.edge_index).max(dim=1)
correct = predicted.eq(data.y).sum().item()
accuracy = correct / data.num_edges
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 77.27%
