In [1]:
!pip install transformers
!pip install torch
!pip install osmnx

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [2]:
import pandas as pd
import numpy as np
import osmnx as ox
import matplotlib.pyplot as plt
import math

In [7]:
ev_df = pd.read_csv("./eVED/eVED_180808_week.csv")
ev_df.head()

Unnamed: 0,DayNum,VehId,Trip,Timestamp(ms),Latitude[deg],Longitude[deg],Vehicle Speed[km/h],MAF[g/sec],Engine RPM[RPM],Absolute Load[%],...,Energy_Consumption,Matchted Latitude[deg],Matched Longitude[deg],Match Type,Class of Speed Limit,Speed Limit[km/h],Speed Limit with Direction[km/h],Intersection,Bus Stops,Focus Points
0,281.984564,10.0,2870.0,0.0,42.269976,-83.749867,55.469997,,,,...,0.003518,42.269986,-83.750159,0.0,0.0,48,48.0,,1.0,
1,281.984564,10.0,2870.0,600.0,42.269976,-83.749867,54.719997,,,,...,0.003518,42.269986,-83.750159,1.0,0.0,48,48.0,,1.0,
2,281.984564,10.0,2870.0,1700.0,42.269976,-83.749867,54.349998,,,,...,0.003518,42.269986,-83.750159,1.0,0.0,48,48.0,,1.0,
3,281.984564,10.0,2870.0,1900.0,42.269976,-83.749867,54.349998,,,,...,0.003622,42.269986,-83.750159,1.0,0.0,48,48.0,,1.0,
4,281.984564,10.0,2870.0,2700.0,42.269976,-83.749867,53.059998,,,,...,0.003622,42.269986,-83.750159,1.0,0.0,48,48.0,,1.0,


In [8]:
grouped = ev_df.groupby(['VehId', 'Trip'])

# Extract complete routes as sequences
route_sequences = []
routes_total = 0;

for (vehid, trip), group in grouped:
    # Sort by timestamp to ensure correct sequence
    group = group.sort_values('Timestamp(ms)')

    # Extract coordinates as sequence
    route = list(zip(group['Latitude[deg]'], group['Longitude[deg]']))

    # Store as (source, destination, full_route)
    source = route[0]
    destination = route[-1]
    route_sequences.append({
        'source': source,
        'destination': destination,
        'full_route': route,
        'vehicle': vehid,
        'trip': trip,
    })

    routes_total += 1


print(routes_total)

87


In [9]:
def create_features(route_data):
    features = []
    for data in route_data:
        src_lat, src_lng = data['source']
        dst_lat, dst_lng = data['destination']

        feature = f"[SRC] {src_lat:.6f} {src_lng:.6f} [DST] {dst_lat:.6f} {dst_lng:.6f}"

        features.append(feature)

    return features


In [26]:
LAT_MIN, LAT_MAX = 33.0, 38.0
LNG_MIN, LNG_MAX = -123.0, -117.0

def normalize(lat, lng):
    norm_lat = (lat - LAT_MIN) / (LAT_MAX - LAT_MIN)
    norm_lng = (lng - LNG_MIN) / (LNG_MAX - LNG_MIN)
    return norm_lat, norm_lng

In [27]:
from torch.utils.data import Dataset, DataLoader

class RouteDataset(Dataset):
  def __init__(self, route_data, tokenizer, max_points=100):
    self.route_data = route_data
    self.tokenizer = tokenizer
    self.max_points = max_points
    self.features = create_features(route_data)
    self.route_targets = self.tokenize_routes(route_data)

  def __len__(self):
    return len(self.route_data)

  def __getitem__(self, idx):
    item_ft = self.features[idx]
    item_tokenized = self.tokenizer(
        item_ft, padding="max_length", truncation=True, max_length=128,
        return_tensors="pt"
    )

    target = self.route_targets[idx]
    item = {
        'input_ids': item_tokenized['input_ids'].squeeze(),
        'attention_mask': item_tokenized['attention_mask'].squeeze(),
        'target_routes': torch.tensor(target, dtype=torch.float32)
    }

    return item

  def tokenize_routes(self, route_data):
    tokenized_routes = []
    for data in route_data:
        route = data['full_route']
        if len(route) > self.max_points:
            indices = np.linspace(0, len(route)-1, self.max_points).astype(int)
            route = [route[i] for i in indices]
        else:
            route = route + [(0, 0)] * (self.max_points - len(route))

        flat_route = []
        for lat, lng in route:
          norm_lat, norm_lng = normalize(lat, lng)
          flat_route.append(norm_lat)
          flat_route.append(norm_lng)

        tokenized_routes.append(flat_route)
    return tokenized_routes


In [28]:
def get_dataloader(route_data, tokenizer, batch_size=8, shuffle=True, max_points=100):
  dataset = RouteDataset(route_data, tokenizer, max_points=max_points)
  loader = DataLoader(
      dataset, batch_size=batch_size,
      shuffle=shuffle,
      num_workers=0,
      pin_memory=True
  )

  return loader

In [29]:
from transformers import BertModel, BertConfig, BertTokenizer
import torch.nn as nn
import torch

class RoutePredictor(nn.Module):
    def __init__(self):
        super(RoutePredictor, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        hidden_size = self.bert.config.hidden_size

        # Adjust the output dimension to match your target shape (100 points × 2 coordinates)
        self.route_head = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Linear(512, 200)  # 100 points with lat/lng for each point
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]  # Use [CLS] token representation
        route_prediction = self.route_head(cls_output)
        return route_prediction.view(route_prediction.size(0), -1, 2)  # Reshape to (batch, points, 2)


In [30]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [31]:
from sklearn.model_selection import train_test_split

train_data, val_data = train_test_split(route_sequences, test_size=0.2, random_state=42)

def train_model(model, train_data, val_data, tokenizer, epochs=10):
    #tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    train_loader = get_dataloader(train_data, tokenizer)
    val_loader = get_dataloader(val_data, tokenizer)

    optimizer = torch.optim.Adam(model.parameters(), lr=2e-5)
    loss_fn = nn.MSELoss()

    for epoch in range(epochs):
      model.train()
      train_loss = 0

      for batch in train_loader:
        optimizer.zero_grad()

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        target_routes = batch['target_routes'].to(device)
        target_routes = target_routes.view(target_routes.size(0), 100, 2)

        outputs = model(input_ids, attention_mask)
        loss = loss_fn(outputs, target_routes)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

      model.eval()
      val_loss = 0

      with torch.no_grad():
          for batch in val_loader:
              input_ids = batch['input_ids'].to(device)
              attention_mask = batch['attention_mask'].to(device)
              target_routes = batch['target_routes'].to(device)
              target_routes = target_routes.view(target_routes.size(0), 100, 2)

              outputs = model(input_ids, attention_mask)
              loss = loss_fn(outputs, target_routes)
              val_loss += loss.item()

      print(f"Epoch {epoch+1}/{epochs}")
      print(f"Train Loss: {train_loss/len(train_loader):.4f}")
      print(f"Val Loss: {val_loss/len(val_loader):.4f}")

In [33]:
model = RoutePredictor()
model.to(device)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
train_model(model, train_data, val_data, tokenizer, epochs=50)

Epoch 1/50
Train Loss: 22.1522
Val Loss: 21.2092
Epoch 2/50
Train Loss: 21.0029
Val Loss: 20.3368
Epoch 3/50
Train Loss: 20.2186
Val Loss: 19.6005
Epoch 4/50
Train Loss: 19.5049
Val Loss: 18.9022
Epoch 5/50
Train Loss: 18.7986
Val Loss: 18.1777
Epoch 6/50
Train Loss: 18.0471
Val Loss: 17.3700
Epoch 7/50
Train Loss: 17.2096
Val Loss: 16.5092
Epoch 8/50
Train Loss: 16.3158
Val Loss: 15.5533
Epoch 9/50
Train Loss: 15.3238
Val Loss: 14.5102
Epoch 10/50
Train Loss: 14.2532
Val Loss: 13.4010
Epoch 11/50
Train Loss: 13.1350
Val Loss: 12.2593
Epoch 12/50
Train Loss: 11.9812
Val Loss: 11.0973
Epoch 13/50
Train Loss: 10.8103
Val Loss: 9.9244
Epoch 14/50
Train Loss: 9.6468
Val Loss: 8.7804
Epoch 15/50
Train Loss: 8.5087
Val Loss: 7.6781
Epoch 16/50
Train Loss: 7.4167
Val Loss: 6.6300
Epoch 17/50
Train Loss: 6.3887
Val Loss: 5.6514
Epoch 18/50
Train Loss: 5.4299
Val Loss: 4.7567
Epoch 19/50
Train Loss: 4.5559
Val Loss: 3.9445
Epoch 20/50
Train Loss: 3.7645
Val Loss: 3.2163
Epoch 21/50
Train Loss: 

In [23]:
def predict_route(model, tokenizer, source, destination):
    model.eval()

    feature = f"[SRC] {source[0]:.6f} {source[1]:.6f} [DST] {destination[0]:.6f} {destination[1]:.6f}"
    inputs = tokenizer(feature, return_tensors="pt", padding="max_length", truncation=True, max_length=128)

    input_ids = inputs["input_ids"].to(device)
    attention_mask = inputs["attention_mask"].to(device)

    with torch.no_grad():
        predicted_coords = model(input_ids=input_ids, attention_mask=attention_mask)

    predicted_route = [(coord[0].item(), coord[1].item()) for coord in predicted_coords[0]]
    return predicted_route


In [40]:
def visualize_route(source, destination, predicted_route):
    lats = [coord[0] for coord in predicted_route] + [source[0], destination[0]]
    lngs = [coord[1] for coord in predicted_route] + [source[1], destination[1]]

    north, south = max(lats) + 0.01, min(lats) - 0.01
    east, west = max(lngs) + 0.01, min(lngs) - 0.01

    bbox = (west, south, east, north)

    G = ox.graph.graph_from_bbox(
        bbox=bbox,
        network_type='drive_service',
        simplify=True
    )

    fig, ax = plt.subplots(figsize=(10, 10))
    ox.plot_graph(G, ax=ax, show=False, close=False, node_size=0, edge_linewidth=0.5)

    pred_lats = [coord[0] for coord in predicted_route]
    pred_lngs = [coord[1] for coord in predicted_route]
    ax.plot(pred_lngs, pred_lats, 'r-', linewidth=2, label='Predicted Route')

    ax.plot(source[1], source[0], 'go', markersize=10, label='Source')
    ax.plot(destination[1], destination[0], 'ro', markersize=10, label='Destination')

    ax.legend()
    plt.tight_layout()
    plt.show()


In [47]:
def visualize_control_and_predicted_route(df, veh_id, trip_id, model, tokenizer):
    route_df = df[(df['VehId'] == veh_id) & (df['Trip'] == trip_id)]
    route_df = route_df.sort_values('Timestamp(ms)')

    lats = route_df['Latitude[deg]'].tolist()
    lngs = route_df['Longitude[deg]'].tolist()

    if len(lats) == 0:
        print(f"No data found for Vehicle {veh_id}, Trip {trip_id}")
        return

    source = (lats[0], lngs[0])
    destination = (lats[-1], lngs[-1])

    predicted_route = predict_route(model, tokenizer, source, destination)
    pred_lats = [coord[0] for coord in predicted_route]
    pred_lngs = [coord[1] for coord in predicted_route]

    all_lats = lats + pred_lats
    all_lngs = lngs + pred_lngs
    north, south = max(all_lats) + 0.01, min(all_lats) - 0.01
    east, west = max(all_lngs) + 0.01, min(all_lngs) - 0.01
    bbox = (west, south, east, north)

    G = ox.graph.graph_from_bbox(
        bbox=bbox,
        network_type='drive_service',
        simplify=True
    )

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))

    ox.plot_graph(G, ax=ax1, show=False, close=False, node_size=0, edge_linewidth=0.5)
    ax1.plot(lngs, lats, 'r-', linewidth=2, label='Actual Route')
    ax1.plot(lngs[0], lats[0], 'go', markersize=10, label='Start')
    ax1.plot(lngs[-1], lats[-1], 'bo', markersize=10, label='End')
    ax1.set_title(f'Actual Route for Vehicle {veh_id}, Trip {trip_id}')
    ax1.legend()

    ox.plot_graph(G, ax=ax2, show=False, close=False, node_size=0, edge_linewidth=0.5)
    ax2.plot(pred_lngs, pred_lats, 'r-', linewidth=2, label='Predicted Route')
    ax2.plot(lngs[0], lats[0], 'go', markersize=10, label='Start')
    ax2.plot(lngs[-1], lats[-1], 'bo', markersize=10, label='End')
    ax2.set_title(f'Model Prediction for Vehicle {veh_id}, Trip {trip_id}')
    ax2.legend()

    plt.tight_layout()
    plt.show()


In [None]:
visualize_control_and_predicted_route(ev_df, veh_id=10, trip_id=2870, model=model, tokenizer=tokenizer)

  multi_poly_proj = utils_geo._consolidate_subdivide_geometry(poly_proj)
