In [1]:
!pip install transformers
!pip install torch
!pip install osmnx

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [1]:
import pandas as pd
import numpy as np
import osmnx as ox
import matplotlib.pyplot as plt
import math

In [2]:
ev_df = pd.read_csv("./eVED/eVED_181107_week.csv")
ev_df.head()

Unnamed: 0,DayNum,VehId,Trip,Timestamp(ms),Latitude[deg],Longitude[deg],Vehicle Speed[km/h],MAF[g/sec],Engine RPM[RPM],Absolute Load[%],...,Energy_Consumption,Matchted Latitude[deg],Matched Longitude[deg],Match Type,Class of Speed Limit,Speed Limit[km/h],Speed Limit with Direction[km/h],Intersection,Bus Stops,Focus Points
0,372.882455,10.0,3263.0,0.0,42.295477,-83.705005,32.079998,,,,...,0.004811,42.295487,-83.704927,0,0.0,64,64.0,,,
1,372.882455,10.0,3263.0,900.0,42.295477,-83.705005,35.73,,,,...,0.004811,42.295487,-83.704927,1,0.0,64,64.0,,,
2,372.882455,10.0,3263.0,1100.0,42.295477,-83.705005,35.73,,,,...,0.005334,42.295487,-83.704927,1,0.0,64,64.0,,,
3,372.882455,10.0,3263.0,2000.0,42.295477,-83.705005,38.77,,,,...,0.005334,42.295487,-83.704927,1,0.0,64,64.0,,,
4,372.882455,10.0,3263.0,2100.0,42.295477,-83.705005,38.77,,,,...,0.005857,42.295487,-83.704927,1,0.0,64,64.0,,,


In [3]:
grouped = ev_df.groupby(['VehId', 'Trip'])

# Extract complete routes as sequences
route_sequences = []
routes_total = 0;

for (vehid, trip), group in grouped:
    # Sort by timestamp to ensure correct sequence
    group = group.sort_values('Timestamp(ms)')

    # Extract coordinates as sequence
    route = list(zip(group['Latitude[deg]'], group['Longitude[deg]']))

    # Store as (source, destination, full_route)
    source = route[0]
    destination = route[-1]
    route_sequences.append({
        'source': source,
        'destination': destination,
        'full_route': route,
        'vehicle': vehid,
        'trip': trip,
    })

    routes_total += 1


print(routes_total)

170


In [4]:
def create_features(route_data):
    features = []
    for data in route_data:
        src_lat, src_lng = data['source']
        dst_lat, dst_lng = data['destination']

        feature = f"[SRC] {src_lat:.6f} {src_lng:.6f} [DST] {dst_lat:.6f} {dst_lng:.6f}"

        features.append(feature)

    return features


In [5]:
from torch.utils.data import Dataset, DataLoader

class RouteDataset(Dataset):
  def __init__(self, route_data, tokenizer, max_points=100):
    self.route_data = route_data
    self.tokenizer = tokenizer
    self.max_points = max_points
    self.features = create_features(route_data)
    self.route_targets = self.tokenize_routes(route_data)

  def __len__(self):
    return len(self.route_data)

  def __getitem__(self, idx):
    item_ft = self.features[idx]
    item_tokenized = self.tokenizer(
        item_ft, padding="max_length", truncation=True, max_length=128,
        return_tensors="pt"
    )

    target = self.route_targets[idx]
    item = {
        'input_ids': item_tokenized['input_ids'].squeeze(),
        'attention_mask': item_tokenized['attention_mask'].squeeze(),
        'target_routes': torch.tensor(target, dtype=torch.float32)
    }

    return item

  def tokenize_routes(self, route_data):
    tokenized_routes = []
    for data in route_data:
        route = data['full_route']
        if len(route) > self.max_points:
            indices = np.linspace(0, len(route)-1, self.max_points).astype(int)
            route = [route[i] for i in indices]
        else:
            route = route + [(0, 0)] * (self.max_points - len(route))

        flat_route = []
        for lat, lng in route:
            flat_route.append(lat)
            flat_route.append(lng)

        tokenized_routes.append(flat_route)
    return tokenized_routes


In [6]:
def get_dataloader(route_data, tokenizer, batch_size=8, shuffle=True, max_points=100):
  dataset = RouteDataset(route_data, tokenizer, max_points=max_points)
  loader = DataLoader(
      dataset, batch_size=batch_size,
      shuffle=shuffle,
      num_workers=0
  )

  return loader

In [7]:
from transformers import BertModel, BertConfig, BertTokenizer
import torch.nn as nn
import torch

class RoutePredictor(nn.Module):
    def __init__(self):
        super(RoutePredictor, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        hidden_size = self.bert.config.hidden_size

        # Adjust the output dimension to match your target shape (100 points × 2 coordinates)
        self.route_head = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Linear(512, 200)  # 100 points with lat/lng for each point
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]  # Use [CLS] token representation
        route_prediction = self.route_head(cls_output)
        return route_prediction.view(route_prediction.size(0), -1, 2)  # Reshape to (batch, points, 2)


In [8]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(route_sequences, test_size=0.2, random_state=42)

def train_model(model, train_data, val_data, epochs=10):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    train_loader = get_dataloader(train_data, tokenizer)
    val_loader = get_dataloader(val_data, tokenizer)

    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
      model.train()
      train_loss = 0

      for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch['input_ids']
        attention_mask = batch['attention_mask']
        target_routes = batch['target_routes']
        target_routes = target_routes.view(target_routes.size(0), 100, 2)

        outputs = model(input_ids, attention_mask)
        loss = loss_fn(outputs, target_routes)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

      model.eval()
      val_loss = 0

      with torch.no_grad():
          for batch in val_loader:
              input_ids = batch['input_ids']
              attention_mask = batch['attention_mask']
              target_routes = batch['target_routes']
              target_routes = target_routes.view(target_routes.size(0), 100, 2)

              outputs = model(input_ids, attention_mask)
              loss = loss_fn(outputs, target_routes)
              val_loss += loss.item()

      print(f"Epoch {epoch+1}/{epochs}")
      print(f"Train Loss: {train_loss/len(train_loader):.4f}")
      print(f"Val Loss: {val_loss/len(val_loader):.4f}")

In [9]:
model = RoutePredictor()
train_model(model, train_data, val_data)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Epoch 1/10
Train Loss: 4380.9002
Val Loss: 4364.0638
Epoch 2/10
Train Loss: 4356.6268
Val Loss: 4343.1803
Epoch 3/10
Train Loss: 4335.4314
Val Loss: 4320.1980
Epoch 4/10
Train Loss: 4309.8824
Val Loss: 4290.7840
Epoch 5/10
Train Loss: 4277.1347
Val Loss: 4254.0384
Epoch 6/10
Train Loss: 4235.7796
Val Loss: 4206.5991
Epoch 7/10
Train Loss: 4184.8976
Val Loss: 4150.6369
Epoch 8/10
Train Loss: 4124.2879
Val Loss: 4083.9349
Epoch 9/10
Train Loss: 4053.9684
Val Loss: 4008.3684
Epoch 10/10
Train Loss: 3973.9962
Val Loss: 3921.9154


In [None]:
  def predict_route(model, source, destination):
    # Prepare input like during training
    feature = f"[SRC] {source[0]:.6f} {source[1]:.6f} [DST] {destination[0]:.6f} {destination[1]:.6f}"

    # Tokenize, convert to tensors
    inputs = tokenizer(feature, return_tensors="pt")

    # Get prediction
    with torch.no_grad():
        route_coords = model(**inputs)

    # Convert from tensor to list of coordinates
    predicted_route = [(lat.item(), lng.item()) for lat, lng in route_coords[0]]

    return predicted_route

In [None]:
# Visualize a predicted route
def visualize_route(source, destination, predicted_route):
    # Get map bounds
    lats = [coord[0] for coord in predicted_route] + [source[0], destination[0]]
    lngs = [coord[1] for coord in predicted_route] + [source[1], destination[1]]

    north, south = max(lats) + 0.01, min(lats) - 0.01
    east, west = max(lngs) + 0.01, min(lngs) - 0.01

    # Create smaller, simplified graph
    G = ox.graph_from_bbox(north, south, east, west, network_type='drive_service', simplify=True)

    # Plot
    fig, ax = plt.subplots(figsize=(10, 10))
    ox.plot_graph(G, ax=ax, show=False, close=False, node_size=0, edge_linewidth=0.5)

    # Plot predicted route
    pred_lats = [coord[0] for coord in predicted_route]
    pred_lngs = [coord[1] for coord in predicted_route]
    ax.plot(pred_lngs, pred_lats, 'r-', linewidth=2, label='Predicted')

    # Plot source/destination
    ax.plot(source[1], source[0], 'go', markersize=10, label='Source')
    ax.plot(destination[1], destination[0], 'ro', markersize=10, label='Destination')

    ax.legend()
    plt.tight_layout()
    plt.show()