In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
from geopy.distance import geodesic
from sf_generator import SFGenerator
from generator import PDPTWGenerator
GOOGLE_MAP = "AIzaSyDwbzGKDxjt8ZYS_L937rwR-BMCXqNW0-Q"

In [17]:
# Load dataset
import h3
data = pd.read_csv("traveler_trip_types.csv")
data.columns

Index(['traveler_id', 'age', 'occupation', 'income', 'flexibility',
       'trip_purpose', 'departure_location', 'arrival_location', 'origin_gps',
       'destination_gps', 'trip_distance_miles', 'trip_duration_minutes',
       'departure_time_window', 'arrival_time_window',
       'flexibility_pickup_earlier', 'flexibility_dropoff_later'],
      dtype='object')

In [44]:
resolution = 9

data["origin_h3"] = data.apply(
    lambda row: h3.latlng_to_cell(
        float(row["origin_gps"].split(",")[0][1:]),           
        float(row["origin_gps"].split(",")[1][:-1]),          
        resolution
    ),
    axis=1
)


data["destination_h3"] = data.apply(
    lambda row: h3.latlng_to_cell(
        float(row["destination_gps"].split(",")[0][1:]),
        float(row["destination_gps"].split(",")[1][:-1]),
        resolution
    ), axis=1
)

unique_h3 = list(set(data["origin_h3"]) | set(data["destination_h3"]))
len(unique_h3)

207

In [46]:
h3_loc = []
for cell in unique_h3:
    h3_loc.append(h3.cell_to_latlng(cell))

In [50]:
export GOOGLE_MAPS_API_KEY="AIzaSyDwbzGKDxjt8ZYS_L937rwR-BMCXqNW0-Q"

Note: you may need to restart the kernel to use updated packages.


In [21]:
# Useful columns: traveler_id, origin_gps, destination_gps, flexibility_pickup_earlier, flexibility_dropoff_later
# Code snippet for getting distance between two points: 
# data["origin_gps"][0] : [37.7516, -122.4482]
def to_tuple(str_loc):
    point_1 = float(str_loc.split(',')[0][1:])
    point_2 = float(str_loc.split(',')[1][:-1])
    return(point_1, point_2)

distance_km = geodesic(to_tuple(data["origin_gps"][0]), to_tuple(data["destination_gps"][0])).km
print(distance_km)

3.992077953991561


In [72]:
import os
import math
import requests
from tqdm import tqdm

def build_travel_time_matrix(h3_loc, api_key="AIzaSyDwbzGKDxjt8ZYS_L937rwR-BMCXqNW0-Q", mode="driving"):
    """
    Build an NxN matrix of travel times (seconds) between all h3_loc points
    using the Google Distance Matrix API, tiling both origins and destinations
    to respect the 100-element limit per request.
    """
    if api_key is None:
        api_key = os.environ.get("GOOGLE_MAPS_API_KEY")
    if not api_key:
        raise ValueError("No API key provided. Set GOOGLE_MAPS_API_KEY or pass api_key.")

    addresses = [f"{lat},{lon}" for (lat, lon) in h3_loc]
    n = len(addresses)

    # Google Distance Matrix: max 100 elements per request (origins * destinations)
    max_elements = 100

    # Choose block sizes so that rows_per_block * cols_per_block <= max_elements.
    # A simple choice is block_size = floor(sqrt(100)) = 10,
    # so worst case each request is 10x10 = 100 elements.
    base_block = int(math.floor(math.sqrt(max_elements)))  # 10
    rows_per_block = min(base_block, n)
    cols_per_block = min(base_block, n)

    time_matrix = [[None for _ in range(n)] for _ in range(n)]

    base_url = "https://maps.googleapis.com/maps/api/distancematrix/json"

    for row_start in tqdm(range(0, n, rows_per_block)):
        row_end = min(row_start + rows_per_block, n)
        origins_chunk = addresses[row_start:row_end]

        # For a given origin block, we still need to chunk destinations
        # so that (#origins * #destinations) <= max_elements.
        rows_this = row_end - row_start
        # For this block of origins, we can afford at most max_elements / rows_this destinations
        max_cols_for_this_block = max_elements // rows_this
        cols_this_block = min(max_cols_for_this_block, cols_per_block)
        if cols_this_block == 0:
            # Fallback: 1 destination at a time (paranoid safety)
            cols_this_block = 1

        for col_start in range(0, n, cols_this_block):
            col_end = min(col_start + cols_this_block, n)
            dests_chunk = addresses[col_start:col_end]

            params = {
                "origins": "|".join(origins_chunk),
                "destinations": "|".join(dests_chunk),
                "mode": mode,
                "key": api_key,
            }

            resp = requests.get(base_url, params=params)
            resp.raise_for_status()
            data = resp.json()

            if data.get("status") != "OK":
                raise RuntimeError(f"Distance Matrix API error: {data.get('status')}, {data}")

            rows = data.get("rows", [])
            if len(rows) != rows_this:
                raise RuntimeError("Unexpected number of rows in Distance Matrix response.")

            for i, row in enumerate(rows):
                origin_idx = row_start + i
                elements = row.get("elements", [])
                expected_cols = col_end - col_start
                if len(elements) != expected_cols:
                    raise RuntimeError(
                        f"Unexpected number of elements in row: "
                        f"{len(elements)} vs expected {expected_cols}"
                    )

                for j, elem in enumerate(elements):
                    dest_idx = col_start + j
                    status = elem.get("status")
                    if status != "OK":
                        time_matrix[origin_idx][dest_idx] = float("inf")
                    else:
                        time_matrix[origin_idx][dest_idx] = elem["duration"]["value"]

    # Set diagonal to zero
    for i in range(n):
        time_matrix[i][i] = 0

    return time_matrix

In [75]:
matrix = build_travel_time_matrix(h3_loc)
b

100%|███████████████████████████████████████████| 21/21 [10:45<00:00, 30.73s/it]


In [83]:
import numpy as np

matrix_array = np.array(matrix, dtype=float)
np.savetxt("travel_time_matrix.csv", matrix_array, delimiter=",")

In [85]:
data.to_csv("traveler_trip_types.csv")

In [11]:
td = PDPTWGenerator()._generate(batch_size=2)

print(td)

TensorDict(
    fields={
        capacity: Tensor(shape=torch.Size([2]), device=cpu, dtype=torch.float32, is_shared=False),
        demand: Tensor(shape=torch.Size([2, 40]), device=cpu, dtype=torch.float32, is_shared=False),
        depot: Tensor(shape=torch.Size([2, 2]), device=cpu, dtype=torch.float32, is_shared=False),
        durations: Tensor(shape=torch.Size([2, 41]), device=cpu, dtype=torch.float32, is_shared=False),
        locs: Tensor(shape=torch.Size([2, 40, 2]), device=cpu, dtype=torch.float32, is_shared=False),
        time_windows: Tensor(shape=torch.Size([2, 41, 2]), device=cpu, dtype=torch.float32, is_shared=False)},
    batch_size=torch.Size([2]),
    device=None,
    is_shared=False)


In [15]:
generator = SFGenerator()
generator.vehicle_capacity

4