## Graph Neural Network Model for Route Optimization



In [None]:
!pip install torch
!pip install osmnx
!pip install pandas numpy osmnx geopandas shapely matplotlib scikit-learn pathlib gymnasium torch torch_geometric torch_sparse torch_scatter

In [None]:
import glob
import warnings
import pandas as pd
import numpy as np
import osmnx as ox
import networkx as nx
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch_geometric.data import Data
from torch_geometric.nn import GINEConv
from torch.nn import Linear, ReLU, Dropout, LayerNorm, GRU
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence

In [None]:
warnings.filterwarnings("ignore", message="invalid value encountered in cast", category=RuntimeWarning)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")

temp_cols = ['VehId','Trip','Timestamp(ms)','Latitude[deg]','Longitude[deg]','Vehicle Speed[km/h]','MAF[g/sec]','Engine RPM[RPM]','Absolute Load[%]','Speed Limit[km/h]']
chunksize = 200_000
cleaned = []
lat_mins, lat_maxs, lon_mins, lon_maxs = [], [], [], []
dtypes = {'VehId':'int32','Trip':'int32'}

def load_ev_data(path):
    for chunk in pd.read_csv(path, usecols=temp_cols, dtype=dtypes, chunksize=chunksize,
                              low_memory=False, on_bad_lines='warn'):
        # numeric conversion
        for col in temp_cols[2:]:
            if col in chunk.columns:
                num = pd.to_numeric(chunk[col], errors='coerce').fillna(0)
                chunk[col] = num.astype('float32') if col!='Timestamp(ms)' else num.astype('int64')
        # filter out zeros
        chunk = chunk[(chunk['Latitude[deg]']!=0)&(chunk['Longitude[deg]']!=0)]
        if chunk.empty: continue
        lat_mins.append(chunk['Latitude[deg]'].min()); lat_maxs.append(chunk['Latitude[deg]'].max())
        lon_mins.append(chunk['Longitude[deg]'].min()); lon_maxs.append(chunk['Longitude[deg]'].max())
        cleaned.append(chunk.sort_values(['VehId','Trip','Timestamp(ms)']))

file_paths = glob.glob('./eVED/*.csv')
if not file_paths:
    raise FileNotFoundError("No CSV files found in ./eVED/")
for p in file_paths:
    load_ev_data(p)
ev_df = pd.concat(cleaned, ignore_index=True)
print(f"Loaded EV data: {ev_df.shape[0]} rows")

In [None]:
LAT_MIN, LAT_MAX = min(lat_mins), max(lat_maxs)
LNG_MIN, LNG_MAX = min(lon_mins), max(lon_maxs)
def normalize_coords(lat, lon):
    nl = (lat - LAT_MIN)/(LAT_MAX-LAT_MIN) if LAT_MAX!=LAT_MIN else 0
    ml = (lon - LNG_MIN)/(LNG_MAX-LNG_MIN) if LNG_MAX!=LNG_MIN else 0
    return np.clip(nl,0,1), np.clip(ml,0,1)

In [None]:
center_lat, center_lon = (LAT_MIN+LAT_MAX)/2, (LNG_MIN+LNG_MAX)/2
print("Downloading OSMnx graph...")
G_nx = ox.graph_from_point((center_lat, center_lon), dist=30000, network_type='drive')
node_id_map = {nid:i for i,nid in enumerate(G_nx.nodes())}
# node features
node_feats = []
for nid,data in G_nx.nodes(data=True):
    nl,ml = normalize_coords(data['y'], data['x']); node_feats.append([nl, ml])
x = torch.tensor(node_feats, dtype=torch.float)
# edge features
e_u,e_v,edge_feats = [],[],[]
lengths, speeds = [], []
for u,v,data in G_nx.edges(data=True):
    lengths.append(data.get('length',0.0))
    ms = data.get('maxspeed',0)
    if isinstance(ms,list): ms = ms[0]
    try: speeds.append(float(str(ms).split()[0]))
    except: speeds.append(0.0)
max_len = max(lengths) or 1.0; MAX_SP=130.0
for i,(u,v) in enumerate(G_nx.edges(data=False)):
    ui,vi = node_id_map[u], node_id_map[v]
    e_u.append(ui); e_v.append(vi)
    nl = lengths[i]/max_len; ns = speeds[i]/MAX_SP
    edge_feats.append([nl, np.clip(ns,0,1)])
edge_index = torch.tensor([e_u,e_v], dtype=torch.long)
edge_attr = torch.tensor(edge_feats, dtype=torch.float)
graph = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
print(graph)

In [None]:
ev_df['node_osm'] = ox.nearest_nodes(G_nx, X=ev_df['Longitude[deg]'], Y=ev_df['Latitude[deg]'])
ev_df['node_idx'] = ev_df['node_osm'].map(node_id_map)
ev_df.dropna(subset=['node_idx'], inplace=True)
ev_df['node_idx'] = ev_df['node_idx'].astype(int)
ev_df.sort_values(['VehId','Trip','Timestamp(ms)'], inplace=True)
ev_df['duration_s'] = ev_df.groupby(['VehId','Trip'])['Timestamp(ms)'].diff().fillna(0)/1000
ev_df['step_energy'] = ev_df['MAF[g/sec]'] * ev_df['duration_s']
trips=[]
for _,g in ev_df.groupby(['VehId','Trip']):
    if len(g)<2: continue
    s = int(g.iloc[0].node_idx); d = int(g.iloc[-1].node_idx)
    e_sum = g.step_energy.sum()
    if s!=d and e_sum>0: trips.append({'source':s,'destination':d,'true_energy':e_sum})
processed_trips_df = pd.DataFrame(trips)
print(f"Processed {len(processed_trips_df)} trips")

In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LEARNING_RATE = 5e-4
BATCH_SIZE = 16  # Match your DataLoader's batch size
NUM_EPOCHS = 30
VALIDATION_SPLIT = 0.15
MODEL_SAVE_PATH = './best_path_energy_predictor.pth'

class PathTripDataset(Dataset):
    def __init__(self, trip_df, graph, G_nx, idx2osm):
        self.trips = trip_df.reset_index(drop=True)
        self.graph = graph
        self.G_nx = G_nx
        self.idx2osm = idx2osm
        # Cache: (osmid_src, osmid_dst) -> torch.LongTensor of edge indices
        self._path_cache = {}

    def __len__(self):
        return len(self.trips)

    def __getitem__(self, i):
        r = self.trips.iloc[i]
        s, d = int(r.source), int(r.destination)
        os_s, os_d = self.idx2osm[s], self.idx2osm[d]
        key = (os_s, os_d)

        # 1) lookup in cache
        if key in self._path_cache:
            edge_idxs = self._path_cache[key]
        else:
            # 2) compute shortest path nodes
            try:
                path_nodes = nx.shortest_path(self.G_nx, os_s, os_d, weight='length')
            except (nx.NetworkXNoPath, nx.NodeNotFound):
                path_nodes = []

            # 3) map node pairs to edge_index positions
            idxs = []
            for u_osm, v_osm in zip(path_nodes[:-1], path_nodes[1:]):
                for j, (u, v) in enumerate(zip(self.graph.edge_index[0], self.graph.edge_index[1])):
                    if self.idx2osm[u] == u_osm and self.idx2osm[v] == v_osm:
                        idxs.append(j)
                        break

            edge_idxs = torch.tensor(idxs, dtype=torch.long)
            # 4) store in cache
            self._path_cache[key] = edge_idxs

        # 5) log-scale energy
        energy_log = np.log1p(r.true_energy)
        # Note: Removed the device placement here to avoid conflicts with training loop
        return edge_idxs, torch.tensor(energy_log, dtype=torch.float32)

In [None]:
class GNNPathEnergyPredictor(nn.Module):
    def __init__(self, node_dim, edge_dim, hidden_dim=128, rnn_hidden=128, num_layers=4, dropout=0.3):
        super().__init__()
        self.node_emb = Linear(node_dim, hidden_dim)
        self.edge_emb = Linear(edge_dim, hidden_dim)
        self.convs = nn.ModuleList()
        self.norms = nn.ModuleList()
        for _ in range(num_layers):
            emlp = nn.Sequential(Linear(hidden_dim, hidden_dim), ReLU(), Linear(hidden_dim, hidden_dim))
            self.convs.append(GINEConv(emlp, edge_dim=hidden_dim))
            self.norms.append(LayerNorm(hidden_dim))
        self.rnn = GRU(input_size=3*hidden_dim, hidden_size=rnn_hidden, batch_first=True)
        self.decoder = nn.Sequential(Linear(rnn_hidden, hidden_dim), ReLU(), Dropout(dropout), Linear(hidden_dim,1))
    def forward(self, graph, paths):
        x = self.node_emb(graph.x)
        ea = self.edge_emb(graph.edge_attr)
        for conv,nrm in zip(self.convs,self.norms):
            xr = x
            x = conv(x, graph.edge_index, ea)
            x = nrm(x); x = F.relu(x)
            x = x + xr
        emb = x
        seqs=[]
        for eidx in paths:
            edges = graph.edge_index[:, eidx]
            u,v = edges[0], edges[1]
            s_emb = emb[u]; d_emb = emb[v]; ef = ea[eidx]
            seqs.append(torch.cat([s_emb, ef, d_emb], dim=1))
        packed = pack_padded_sequence(pad_sequence(seqs, batch_first=True), [len(s) for s in seqs], batch_first=True, enforce_sorted=False)
        _, h = self.rnn(packed)
        preds = self.decoder(h.squeeze(0)).squeeze(1)
        return preds

In [None]:
print(f"Splitting trip data into training and validation sets...")
train_trips_df, val_trips_df = train_test_split(
    processed_trips_df,
    test_size=VALIDATION_SPLIT,
    random_state=42
)
print(f"Training trips: {len(train_trips_df)}, Validation trips: {len(val_trips_df)}")

# Create datasets and dataloaders
idx2osm = {v: k for k, v in node_id_map.items()}
train_dataset = PathTripDataset(train_trips_df, graph, G_nx, idx2osm)
val_dataset = PathTripDataset(val_trips_df, graph, G_nx, idx2osm)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE*2, shuffle=False)

# Initialize model, optimizer, and loss function
model = GNNPathEnergyPredictor(graph.x.shape[1], graph.edge_attr.shape[1]).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss_fn = nn.MSELoss()

# Prepare graph data (move to device once)
print(f"Moving graph data to device {DEVICE}...")
try:
    prepared_graph = graph.to(DEVICE)
    print("Graph data successfully moved.")
except Exception as e:
    print(f"Error moving graph data to device {DEVICE}: {e}")
    print("Exiting.")
    exit()

# Training tracking variables
best_val_loss = float('inf')
history = {'train_loss': [], 'val_loss': []}

# Training loop
print(f"\n--- Starting Training for {NUM_EPOCHS} Epochs ---")
for epoch in range(NUM_EPOCHS):
    start_time = time.time()
    
    # --- Training Phase ---
    model.train()
    total_train_loss = 0.0
    train_batches = 0
    train_progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Train]", leave=False)
    
    for edge_idxs, energy in train_progress_bar:
        optimizer.zero_grad()
        
        # Move batch data to device
        edge_idxs = edge_idxs.to(DEVICE)
        energy = energy.to(DEVICE)
        
        # Forward pass
        predictions = model(prepared_graph, edge_idxs)
        
        # Calculate loss
        loss = loss_fn(predictions, energy)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()
        train_batches += 1
        train_progress_bar.set_postfix({'loss': f"{loss.item():.4f}"})
    
    avg_train_loss = total_train_loss / train_batches if train_batches > 0 else 0.0
    history['train_loss'].append(avg_train_loss)
    
    # --- Validation Phase ---
    model.eval()
    total_val_loss = 0.0
    val_batches = 0
    val_progress_bar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} [Val]", leave=False)
    
    with torch.no_grad():
        for edge_idxs, energy in val_progress_bar:
            # Move batch data to device
            edge_idxs = edge_idxs.to(DEVICE)
            energy = energy.to(DEVICE)
            
            # Forward pass
            predictions = model(prepared_graph, edge_idxs)
            
            # Calculate loss
            loss = loss_fn(predictions, energy)
            total_val_loss += loss.item()
            val_batches += 1
            val_progress_bar.set_postfix({'loss': f"{loss.item():.4f}"})
    
    avg_val_loss = total_val_loss / val_batches if val_batches > 0 else 0.0
    history['val_loss'].append(avg_val_loss)
    end_time = time.time()
    epoch_duration = end_time - start_time
    
    # --- Epoch Summary and Model Saving ---
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Duration: {epoch_duration:.2f}s")
    print(f"  Train Loss: {avg_train_loss:.6f}")
    print(f"  Val Loss:   {avg_val_loss:.6f}")
    
    # Save the model if validation loss improved
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        # Save the model state dictionary
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
        print(f"  Validation loss improved. Saved model to {MODEL_SAVE_PATH}")
    else:
        print("  Validation loss did not improve.")

print("\n--- Training Complete ---")
print(f"Best Validation Loss: {best_val_loss:.6f}")
print(f"Best model saved to: {MODEL_SAVE_PATH}")

# Optional: Load best model for inference
best_model = GNNPathEnergyPredictor(graph.x.shape[1], graph.edge_attr.shape[1]).to(DEVICE)
best_model.load_state_dict(torch.load(MODEL_SAVE_PATH))
print("Loaded best model for inference.")