# Test GraphHopper PT Routing (Minimal)
This notebook generates a few random origin-destination (OD) pairs within a coarse bounding box of Braunschweig, calls the running GraphHopper PT backend (port 8989), parses the results with the **minimal parser** (`results_processing`), and stores a small result table.

**Workflow:**
1. Set parameters (port, departure time, number of random pairs)
2. Backend health check / notice if not reachable
3. Generate random coordinates
4. Batch routing (async)
5. Inspect raw result (paths / transfers)
6. Flatten -> DataFrame
7. Export (CSV / Parquet)

Note: We ALWAYS select the temporally fastest path per OD. Pure walking routes then have empty PT fields (`route_id_0 = None`).

In [1]:
# Imports
import random, time, json, os, datetime as dt
import pandas as pd
import asyncio
import requests
from pathlib import Path

# Heuristically find project root (go up until 'input' exists)
def find_repo_root(start: Path = Path.cwd()):
    cur = start.resolve()
    for _ in range(8):
        if (cur / 'input').exists():
            return cur
        if cur.parent == cur:
            break
        cur = cur.parent
    return start.resolve()

REPO_ROOT = find_repo_root()
print('Repo root:', REPO_ROOT)

Repo root: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis


In [2]:
# Parameters
PORT = 8989  # GraphHopper port
DEPARTURE_TIME = '2025-05-13T08:00:00Z'  # ISO UTC
N_RANDOM = 100  # Number of random OD pairs
BBOX = (10.45, 52.22, 10.60, 52.32)  # (min_lon, min_lat, max_lon, max_lat)
RANDOM_SEED = 42
EXPORT_DIR = REPO_ROOT / 'output' / 'notebook_tests'
EXPORT_DIR.mkdir(parents=True, exist_ok=True)
print('Export directory:', EXPORT_DIR)

Export directory: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\notebook_tests


In [3]:
# Health check

def backend_ready(port: int, timeout: float = 0.0) -> bool:
    base = f'http://localhost:{port}'
    urls = [f'{base}/health', f'{base}/actuator/health']
    end = time.time() + timeout
    while True:
        for u in urls:
            try:
                r = requests.get(u, timeout=1.5)
                if r.status_code == 200:
                    return True
            except Exception:
                pass
        if timeout <= 0 or time.time() >= end:
            return False
        time.sleep(0.5)

if backend_ready(PORT, timeout=2.0):
    print(f'Backend OK on port {PORT}.')
else:
    print('WARN: Backend not reachable. Start separately (run-grashopper-routing-backend.py).')

Backend OK on port 8989.


In [4]:
# Generate random coordinate pairs (note: lat, lon order for router)
random.seed(RANDOM_SEED)
coords_list = []
min_lon, min_lat, max_lon, max_lat = BBOX
for _ in range(N_RANDOM):
    origin_lat = random.uniform(min_lat, max_lat)
    origin_lon = random.uniform(min_lon, max_lon)
    dest_lat = random.uniform(min_lat, max_lat)
    dest_lon = random.uniform(min_lon, max_lon)
    coords_list.append((origin_lat, origin_lon, dest_lat, dest_lon))
coords_list

[(52.28394267984579, 10.4537516132834, 52.24750293183691, 10.483481610722324),
 (52.2936471214164, 10.551504923113436, 52.30921795677048, 10.463040824894412),
 (52.26219218196853, 10.45446958291571, 52.24186379748036, 10.525803293215503),
 (52.222653596968385,
  10.479825647602997,
  52.284988443777955,
  10.531741222090481),
 (52.242044062204066,
  10.538389852581385,
  52.300943045667786,
  10.450974813951708),
 (52.30058192518328, 10.554720909248234, 52.2540250516518, 10.473321924971767),
 (52.31572130722068,
  10.500489181766893,
  52.229274584338015,
  10.46450745652502),
 (52.304749436634744,
  10.540558904705033,
  52.30071282732744,
  10.559459768004071),
 (52.27362280914547,
  10.595967364596905,
  52.25785343772083,
  10.532806094690983),
 (52.3029404664253, 10.542777962854636, 52.30617069003108, 10.536602821788513),
 (52.29045718362149,
  10.456873657548348,
  52.242789827565154,
  10.493408194540315),
 (52.22797919769236,
  10.484918632954153,
  52.23010014294097,
  10.4916

In [5]:
# Import TransitRouter
import sys
sys.path.append(str(REPO_ROOT / 'src' / 'pt-traveltimeratio'))
from gtfs_routing.transit_router import TransitRouter
router = TransitRouter(port=PORT)
router

<gtfs_routing.transit_router.TransitRouter at 0x11fe572d400>

In [6]:
# Run routing (async). In IPython we can potentially use top-level 'await'.
async def run_batch(coords, departure_time):
    return await router.batch_pt_routes_safe(coords, departure_time=departure_time)

try:
    loop = asyncio.get_event_loop()
    if loop.is_running():
        # Notebook event loop active -> use 'await'
        print('Using top-level await...')
        results = await run_batch(coords_list, DEPARTURE_TIME)  # type: ignore # noqa
    else:
        results = loop.run_until_complete(run_batch(coords_list, DEPARTURE_TIME))
except RuntimeError:
    # Fallback if no loop exists
    results = asyncio.run(run_batch(coords_list, DEPARTURE_TIME))


Using top-level await...
Routing batch 1 (0 to 99)...


## All paths per OD (validation)
This section builds a second DataFrame containing every returned path (not only the fastest) for manual validation. One row per (OD, path_index). `is_best` marks the path chosen in the main summary.

In [7]:
import math
from results_processing import parse_graphhopper_response, select_best_path

all_rows = []
for od_index, raw in enumerate(results):
    if not raw:
        continue
    paths = raw.get('paths', []) or []
    best = select_best_path(paths)
    best_id = id(best) if best else None
    for path_idx, p in enumerate(paths):
        parsed = parse_graphhopper_response(p) or {}
        row = {
            'od_index': od_index,
            'path_index': path_idx,
            'is_best': id(p) == best_id,
            'gh_time': parsed.get('gh_time'),
            'gh_distance': parsed.get('gh_distance'),
            'pure_pt_travel_time': parsed.get('pure_pt_travel_time'),
            'total_walking_distance': parsed.get('total_walking_distance'),
            'walking_dist_to_first_pt_station': parsed.get('walking_dist_to_first_pt_station'),
            'transfers': parsed.get('transfers'),
            'has_pt': parsed.get('has_pt'),
            'n_pt_legs': parsed.get('n_pt_legs'),
            'route_id_0': parsed.get('route_id_0'),
            'route_id_1': parsed.get('route_id_1'),
            'route_id_2': parsed.get('route_id_2'),
            'trip_headsign_0': parsed.get('trip_headsign_0'),
            'trip_headsign_1': parsed.get('trip_headsign_1'),
            'trip_headsign_2': parsed.get('trip_headsign_2'),
            'first_leg_departure_time': parsed.get('first_leg_departure_time'),
            'final_arrival_time': parsed.get('final_arrival_time'),
        }
        all_rows.append(row)

df_all_paths = pd.DataFrame(all_rows)
df_all_paths

Unnamed: 0,od_index,path_index,is_best,gh_time,gh_distance,pure_pt_travel_time,total_walking_distance,walking_dist_to_first_pt_station,transfers,has_pt,n_pt_legs,route_id_0,route_id_1,route_id_2,trip_headsign_0,trip_headsign_1,trip_headsign_2,first_leg_departure_time,final_arrival_time
0,0,0,True,2288.512,755.878,1500.0,755.877750,327.387000,1,True,2,34_560,34_3,,406499731 Hinrichtung 406499747,405933540 Rückrichtung 405933540,,2025-05-13T08:01:04.283+00:00,2025-05-13T08:38:08.512+00:00
1,0,1,False,2708.512,755.878,1260.0,755.877750,327.387000,1,True,2,34_431,34_3,,406495743 Hinrichtung 406495743,405933540 Rückrichtung 405933540,,2025-05-13T08:10:04.283+00:00,2025-05-13T08:45:08.512+00:00
2,0,2,False,3608.512,755.878,1740.0,755.877750,327.387000,1,True,2,34_411,34_3,,406486807 Hinrichtung 406486807,405933540 Rückrichtung 405933540,,2025-05-13T08:21:04.283+00:00,2025-05-13T09:00:08.512+00:00
3,0,3,False,4189.134,5818.242,0.0,5818.241750,5818.241750,0,False,0,,,,,,,2025-05-13T08:00:00.000+00:00,2025-05-13T09:09:49.134+00:00
4,1,0,True,3352.645,2118.568,1260.0,2118.568105,1378.782000,1,True,2,34_426,34_416,,406493879 Rückrichtung 406493879,406489370 Rückrichtung 406489370,,2025-05-13T08:06:27.273+00:00,2025-05-13T08:55:52.645+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,99,3,False,2344.660,3256.478,0.0,3256.477770,3256.477770,0,False,0,,,,,,,2025-05-13T08:00:00.000+00:00,2025-05-13T08:39:04.660+00:00
632,99,4,False,2362.148,3133.197,60.0,3133.196770,852.433692,0,True,1,34_3,,,405933996 Hinrichtung 405933996,,,2025-05-13T08:00:46.245+00:00,2025-05-13T08:39:22.148+00:00
633,99,5,False,2516.314,3379.906,60.0,3379.905770,468.351692,0,True,1,34_5,,,405934778 Rückrichtung 405934786,,,2025-05-13T08:00:22.786+00:00,2025-05-13T08:41:56.314+00:00
634,99,6,False,2587.047,2881.922,300.0,2881.921770,372.132692,1,True,2,34_5,34_1,,405934778 Rückrichtung 405934786,405932446 Hinrichtung 405932446,,2025-05-13T08:02:32.063+00:00,2025-05-13T08:43:07.047+00:00


In [8]:
# Full flatten -> DataFrame (fastest path per OD)
from results_processing import results_to_dataframe_with_options  # uses minimal parser
df_flat = results_to_dataframe_with_options(results)
print('Fastest-only rows:', len(df_flat))
df_flat.head()

Fastest-only rows: 100


Unnamed: 0,od_index,transfers,gh_time,gh_distance,pure_pt_travel_time,total_walking_distance,walking_dist_to_first_pt_station,first_station_name,first_station_coord,trip_headsign_0,...,route_id_2,first_leg_departure_time,final_arrival_time,has_pt,n_pt_legs,options_count,departure_times_list,total_times_list,routes_sequences,best_route_sequence
0,0,1,2288.512,755.878,1500.0,755.87775,327.387,Bundesallee,"[10.4517, 52.28583]",406499731 Hinrichtung 406499747,...,,2025-05-13T08:01:04.283+00:00,2025-05-13T08:38:08.512+00:00,True,2,4,"[2025-05-13T08:01:04.283+00:00, 2025-05-13T08:...","[2288.512, 2708.512, 3608.512, 4189.134]","[[34_560, 34_3], [34_431, 34_3], [34_411, 34_3...","[34_560, 34_3]"
1,1,1,3352.645,2118.568,1260.0,2118.568105,1378.782,Ottenroder Straße,"[10.543369, 52.286393]",406493879 Rückrichtung 406493879,...,,2025-05-13T08:06:27.273+00:00,2025-05-13T08:55:52.645+00:00,True,2,12,"[2025-05-13T08:06:27.273+00:00, 2025-05-13T08:...","[3352.645, 4012.645, 5152.645, 5495.731, 5495....","[[34_426, 34_416], [34_433], [34_416], [34_414...","[34_426, 34_416]"
2,2,1,3247.723,675.886,1560.0,675.885715,264.934544,Raffturm,"[10.45152, 52.26022]",406497462 Hinrichtung 406497462,...,,2025-05-13T08:19:49.247+00:00,2025-05-13T08:54:07.723+00:00,True,2,8,"[2025-05-13T08:19:49.247+00:00, 2025-05-13T08:...","[3247.723, 3727.723, 5047.723, 5282.218, 5806....","[[34_450, 34_2], [34_418, 34_1], [34_450, 34_2...","[34_450, 34_2]"
3,3,1,3734.493,2130.026,1950.0,2130.026249,1693.230688,Raabestraße,"[10.500052, 52.216548]",406488502 Hinrichtung 406488502,...,,2025-05-13T08:01:40.873+00:00,2025-05-13T09:02:14.493+00:00,True,2,7,"[2025-05-13T08:01:40.873+00:00, 2025-05-13T08:...","[3734.493, 4386.678, 4804.561, 5132.82, 6539.0...","[[34_413, 34_426], [34_5, 34_10], [34_413, 34_...","[34_413, 34_426]"
4,4,1,3290.95,694.59,2280.0,694.590423,290.491827,Schefflerstraße,"[10.53849, 52.24018]",406486410 Rückrichtung 406486410,...,,2025-05-13T08:02:30.846+00:00,2025-05-13T08:54:50.950+00:00,True,2,7,"[2025-05-13T08:02:30.846+00:00, 2025-05-13T08:...","[3290.95, 3950.95, 4190.95, 6280.602, 6280.602...","[[34_411, 34_416], [34_431, 34_433], [34_411, ...","[34_411, 34_416]"


In [42]:
# Export (timestamp in filename)
ts = dt.datetime.utcnow().strftime('%Y%m%d_%H%M%S')
base_name = f'test_routing_{ts}'

csv_path = EXPORT_DIR / f'{base_name}.csv'
parquet_path = EXPORT_DIR / f'{base_name}.parquet'
all_paths_csv = EXPORT_DIR / f'{base_name}_all_paths.csv'
all_paths_parquet = EXPORT_DIR / f'{base_name}_all_paths.parquet'

# Fastest-only
df_flat.to_csv(csv_path, index=False)
# All paths
if 'df_all_paths' in globals():
    df_all_paths.to_csv(all_paths_csv, index=False)

try:
    df_flat.to_parquet(parquet_path, index=False)
    if 'df_all_paths' in globals():
        df_all_paths.to_parquet(all_paths_parquet, index=False)
    print('Export CSV (fastest):', csv_path)
    print('Export CSV (all paths):', all_paths_csv)
    print('Export Parquet (fastest):', parquet_path)
    print('Export Parquet (all paths):', all_paths_parquet)
except Exception as e:
    print('Parquet export failed:', e)

csv_path

Export CSV (fastest): C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\notebook_tests\test_routing_20250909_093839.csv
Export CSV (all paths): C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\notebook_tests\test_routing_20250909_093839_all_paths.csv
Export Parquet (fastest): C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\notebook_tests\test_routing_20250909_093839.parquet
Export Parquet (all paths): C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\notebook_tests\test_routing_20250909_093839_all_paths.parquet


  ts = dt.datetime.utcnow().strftime('%Y%m%d_%H%M%S')


WindowsPath('C:/Users/bienzeisler/Documents/GitHub/RVB-VISUM-analysis/output/notebook_tests/test_routing_20250909_093839.csv')

In [43]:
od_meta

Unnamed: 0,origins,destinations,usage_rebus,pt_transfers,pt_trips,origin_name,destination_name,n_connectors_o,n_connectors_d,connector_far_endpoints_o_wkt,connector_far_endpoints_d_wkt,origin_geometry_wkb,destination_geometry_wkb,expected_pairs_row
0,310101901,310101902,0.199772,0.002856,22.912864,BS Stadtkern 6,BS Stadtkern 9,8,4,MULTIPOINT ((603753.1947001892 5791660.6708993...,MULTIPOINT ((604042.8107924324 5791528.6047887...,0103000000010000002C000000158C4A8A8B6D22411904...,010300000001000000260000000D71ACEB2C6D2241D509...,32
1,310101901,310101903,0.176499,0.020194,21.427874,BS Stadtkern 6,BS Stadtkern 13,8,8,MULTIPOINT ((603753.1947001892 5791660.6708993...,MULTIPOINT ((604018.5991001911 5791116.5888993...,0103000000010000002C000000158C4A8A8B6D22411904...,010300000001000000400000009A081B3E6A6D2241AD69...,64
2,310101901,310101905,0.160468,0.002142,26.381425,BS Stadtkern 6,BS Stadtkern 15,8,1,MULTIPOINT ((603753.1947001892 5791660.6708993...,MULTIPOINT ((603697.1937001882 5791271.1737993...,0103000000010000002C000000158C4A8A8B6D22411904...,010300000001000000160000006C787AC5806E224119E2...,8
3,310101901,310101906,0.126761,0.001530,7.532319,BS Stadtkern 6,BS Stadtkern 19,8,5,MULTIPOINT ((603753.1947001892 5791660.6708993...,MULTIPOINT ((603589.7528001876 5791185.4710993...,0103000000010000002C000000158C4A8A8B6D22411904...,01030000000100000023000000D712F2A1926922413C4E...,40
4,310101901,310101907,0.122328,0.000000,12.952408,BS Stadtkern 6,BS Stadtkern 1,8,4,MULTIPOINT ((603753.1947001892 5791660.6708993...,MULTIPOINT ((603411.6038001857 5791542.2455993...,0103000000010000002C000000158C4A8A8B6D22411904...,010300000001000000290000000DE02D9092692241B37B...,32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4848,315803730,315803712,0.758223,0.000000,17.256473,WB Weisse Schanze 30,WB Heinrichstadt 12,6,1,MULTIPOINT ((604340.0656001947 5779700.7576993...,MULTIPOINT ((605266.4475002037 5780032.9816992...,0103000000010000001300000088F4DBB76C702241E2E9...,0103000000010000001F000000B4C876BEB97B2241D881...,6
4849,315803730,315803716,0.760112,0.000509,44.216752,WB Weisse Schanze 30,WB Krz Holz 16,6,1,MULTIPOINT ((604340.0656001947 5779700.7576993...,MULTIPOINT ((606022.3235002109 5782061.8936992...,0103000000010000001300000088F4DBB76C702241E2E9...,010300000001000000300000008FE4F21FE5772241D656...,6
4850,315803730,315803718,0.079595,0.000000,8.126527,WB Weisse Schanze 30,WB Krz Holz 18,6,5,MULTIPOINT ((604340.0656001947 5779700.7576993...,MULTIPOINT ((606916.9989002203 5781523.6545992...,0103000000010000001300000088F4DBB76C702241E2E9...,0103000000010000000900000058CA32644883224157EC...,30
4851,315803730,315803720,0.420622,0.486363,16.467809,WB Weisse Schanze 30,WB Linden 20,6,1,MULTIPOINT ((604340.0656001947 5779700.7576993...,MULTIPOINT ((607141.5890002229 5778471.9835992...,0103000000010000001300000088F4DBB76C702241E2E9...,0103000000010000002E0000006C787A85778E22411C7C...,6


## OD Meta Expansion (Connector Endpoint Pairs)

The following cells create from `od_meta` an expanded DataFrame where every combination of origin endpoint and destination endpoint is its own row. This is a preparation structure for later routing joins (one `od_index` / `pair_id` per pair).

In [44]:
# Function to expand OD meta to endpoint pairs
import pandas as pd
from shapely import wkt
from shapely.geometry import Point

def _unpack_multipoint(wkt_str):
    if not wkt_str or pd.isna(wkt_str):
        return []
    try:
        g = wkt.loads(wkt_str)
    except Exception:
        return []
    if isinstance(g, Point):
        return [g]
    geoms = getattr(g, 'geoms', None)
    if geoms is not None:
        return [p for p in geoms if isinstance(p, Point)]
    return []

def expand_od_meta(
    df: pd.DataFrame,
    origin_wkt_col: str = 'connector_far_endpoints_o_wkt',
    dest_wkt_col: str = 'connector_far_endpoints_d_wkt',
    keep_cols: list | None = None,
    relation_id_col: str = 'relation_id',
) -> pd.DataFrame:
    if keep_cols is None:
        keep_cols = [
            relation_id_col,
            'origins','destinations','origin_name','destination_name',
            'usage_rebus','pt_transfers','pt_trips'
        ]
    present_keep = [c for c in keep_cols if c in df.columns]
    rows = []
    for base_row_id, row in df.iterrows():
        o_pts = _unpack_multipoint(row.get(origin_wkt_col))
        d_pts = _unpack_multipoint(row.get(dest_wkt_col))
        if not o_pts or not d_pts:
            continue
        base_meta = {c: row.get(c) for c in present_keep}
        base_meta['row_id'] = base_row_id
        base_meta['n_connectors_o'] = len(o_pts)
        base_meta['n_connectors_d'] = len(d_pts)
        for oi, op in enumerate(o_pts):
            for di, dp in enumerate(d_pts):
                rows.append({
                    **base_meta,
                    'origin_endpoint_index': oi,
                    'destination_endpoint_index': di,
                    'origin_lon': op.x,
                    'origin_lat': op.y,
                    'dest_lon': dp.x,
                    'dest_lat': dp.y,
                })
    if not rows:
        return pd.DataFrame(columns=[
            'od_index','pair_id','row_id','origin_endpoint_index','destination_endpoint_index',
            'origin_lat','origin_lon','dest_lat','dest_lon'
        ] + present_keep + ['n_connectors_o','n_connectors_d'])
    expanded = pd.DataFrame(rows).reset_index(drop=True)
    expanded['od_index'] = range(len(expanded))
    def _mk_id(r):
        rel = r.get(relation_id_col)
        if pd.notna(rel):
            return f"REL_{rel}__O{int(r.origin_endpoint_index)+1}_D{int(r.destination_endpoint_index)+1}"
        return f"ROW_{int(r.row_id)}_O{int(r.origin_endpoint_index)+1}_D{int(r.destination_endpoint_index)+1}"
    expanded['pair_id'] = expanded.apply(_mk_id, axis=1)
    first_cols = [
        'od_index','pair_id',
        relation_id_col if relation_id_col in expanded.columns else None,
        'row_id','origin_endpoint_index','destination_endpoint_index',
        'origin_lat','origin_lon','dest_lat','dest_lon','n_connectors_o','n_connectors_d'
    ]
    first_cols = [c for c in first_cols if c in expanded.columns]
    other = [c for c in expanded.columns if c not in first_cols]
    return expanded[first_cols + other]

print('expand_od_meta() defined.')

expand_od_meta() defined.


In [45]:
# Apply expansion (assumes od_meta already loaded)
try:
    expanded_od_meta = expand_od_meta(od_meta)
    print('Expanded pair rows:', len(expanded_od_meta))
    display(expanded_od_meta.head(10))
    out_path = REPO_ROOT / 'output' / 'scenario_V10_2025' / 'od_meta_expanded.parquet'
    expanded_od_meta.to_parquet(out_path, index=False)
    print('Saved to:', out_path)
except NameError:
    print('Variable od_meta not defined – load it above first.')

Expanded pair rows: 66532


Unnamed: 0,od_index,pair_id,row_id,origin_endpoint_index,destination_endpoint_index,origin_lat,origin_lon,dest_lat,dest_lon,n_connectors_o,n_connectors_d,origins,destinations,origin_name,destination_name,usage_rebus,pt_transfers,pt_trips
0,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
1,1,ROW_0_O1_D2,0,0,1,5791661.0,603753.1947,5791511.0,604170.7251,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
2,2,ROW_0_O1_D3,0,0,2,5791661.0,603753.1947,5791384.0,603935.1407,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
3,3,ROW_0_O1_D4,0,0,3,5791661.0,603753.1947,5791416.0,603781.5326,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
4,4,ROW_0_O2_D1,0,1,0,5791549.0,604048.4156,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
5,5,ROW_0_O2_D2,0,1,1,5791549.0,604048.4156,5791511.0,604170.7251,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
6,6,ROW_0_O2_D3,0,1,2,5791549.0,604048.4156,5791384.0,603935.1407,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
7,7,ROW_0_O2_D4,0,1,3,5791549.0,604048.4156,5791416.0,603781.5326,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
8,8,ROW_0_O3_D1,0,2,0,5791719.0,603975.2087,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864
9,9,ROW_0_O3_D2,0,2,1,5791719.0,603975.2087,5791511.0,604170.7251,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864


Saved to: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\scenario_V10_2025\od_meta_expanded.parquet


In [46]:
# Analysis: expected number of routing requests
# Assumption: expanded_od_meta has each (origin_endpoint,destination_endpoint) pair exactly once.
import math

if 'od_meta' in globals():
    o_wkt_col = 'connector_far_endpoints_o_wkt'
    d_wkt_col = 'connector_far_endpoints_d_wkt'

    def _count_points(wkt_str):
        if not wkt_str or pd.isna(wkt_str):
            return 0
        try:
            g = wkt.loads(wkt_str)
        except Exception:
            return 0
        geoms = getattr(g, 'geoms', None)
        if geoms is None:
            return 1 if isinstance(g, Point) else 0
        return sum(1 for gg in geoms if isinstance(gg, Point))

    if 'n_connectors_o' not in od_meta.columns or 'n_connectors_d' not in od_meta.columns:
        od_meta = od_meta.copy()
        od_meta['n_connectors_o'] = od_meta[o_wkt_col].map(_count_points)
        od_meta['n_connectors_d'] = od_meta[d_wkt_col].map(_count_points)

    od_meta['expected_pairs_row'] = od_meta['n_connectors_o'] * od_meta['n_connectors_d']
    total_expected_pairs = int(od_meta['expected_pairs_row'].sum())
    distinct_pairs_expanded = int(len(expanded_od_meta)) if 'expanded_od_meta' in globals() else None

    print('--- Expected routing pair count ---')
    print('Sum n_connectors_o * n_connectors_d  :', total_expected_pairs)
    print('Rows in expanded_od_meta             :', distinct_pairs_expanded)
    if distinct_pairs_expanded is not None:
        diff = total_expected_pairs - distinct_pairs_expanded
        print('Difference (expected - expanded)     :', diff)
        if diff == 0:
            print('OK: All expected pairs are present.')
        elif diff > 0:
            print('MISSING: expanded_od_meta has fewer pairs than expected.')
        else:
            print('EXTRA: expanded_od_meta has more pairs than expected (unexpected).')

    if not od_meta.empty:
        first = od_meta.iloc[0]
        print('\nExample first row:')
        print('n_connectors_o =', first['n_connectors_o'], '| n_connectors_d =', first['n_connectors_d'], '| product =', first['n_connectors_o'] * first['n_connectors_d'])
else:
    print('od_meta not in namespace.')

--- Expected routing pair count ---
Sum n_connectors_o * n_connectors_d  : 66532
Rows in expanded_od_meta             : 66532
Difference (expected - expanded)     : 0
OK: All expected pairs are present.

Example first row:
n_connectors_o = 8 | n_connectors_d = 4 | product = 32


### Time slices 06:00–22:00 every 15 minutes for each endpoint pair
We create a table that for every pair and every start time (UTC) has a row with `departure_time`.

In [47]:
from datetime import datetime, timedelta, timezone
import pandas as pd

# Parameters
DATE = '2025-05-13'  # Base date (UTC)
START_H = 6
END_H = 22  # inclusive 22:00 slice
INTERVAL_MIN = 15

# Build time slices
start_dt = datetime.fromisoformat(f"{DATE}T{START_H:02d}:00:00+00:00")
end_dt = datetime.fromisoformat(f"{DATE}T{END_H:02d}:00:00+00:00")
cur = start_dt
slice_times = []
while cur <= end_dt:
    slice_times.append(cur.astimezone(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'))
    cur += timedelta(minutes=INTERVAL_MIN)

print('Time slices:', len(slice_times), 'Example:', slice_times[:4], '... last:', slice_times[-2:])

if 'expanded_od_meta' not in globals():
    raise RuntimeError('expanded_od_meta missing (execute earlier cells).')

# Cartesian product: each pair x each time
schedule_df = (
    expanded_od_meta.assign(key=1)
    .merge(pd.DataFrame({'departure_time': slice_times, 'key': 1}), on='key')
    .drop(columns='key')
)

print('Total rows (pairs * slices):', len(schedule_df))
print('Pairs:', len(expanded_od_meta), 'Slices:', len(slice_times))
expected = len(expanded_od_meta) * len(slice_times)
print('Expected:', expected, 'OK?' , expected == len(schedule_df))

schedule_df.head()

Time slices: 65 Example: ['2025-05-13T06:00:00Z', '2025-05-13T06:15:00Z', '2025-05-13T06:30:00Z', '2025-05-13T06:45:00Z'] ... last: ['2025-05-13T21:45:00Z', '2025-05-13T22:00:00Z']
Total rows (pairs * slices): 4324580
Pairs: 66532 Slices: 65
Expected: 4324580 OK? True
Total rows (pairs * slices): 4324580
Pairs: 66532 Slices: 65
Expected: 4324580 OK? True


Unnamed: 0,od_index,pair_id,row_id,origin_endpoint_index,destination_endpoint_index,origin_lat,origin_lon,dest_lat,dest_lon,n_connectors_o,n_connectors_d,origins,destinations,origin_name,destination_name,usage_rebus,pt_transfers,pt_trips,departure_time
0,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,2025-05-13T06:00:00Z
1,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,2025-05-13T06:15:00Z
2,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,2025-05-13T06:30:00Z
3,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,2025-05-13T06:45:00Z
4,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,4,310101901,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,2025-05-13T07:00:00Z


### Transform coordinates (ETRS89 / UTM Zone 32N -> WGS84)
If `origin_lon/origin_lat` are still UTM (easting/northing) we add additional WGS84 columns (`*_lon_wgs84`, `*_lat_wgs84`).

In [48]:
# CRS transformation
try:
    from pyproj import Transformer
except ImportError:
    raise ImportError('pyproj not installed (install in venv).')

# Transformer: input EPSG:25832 (UTM32N ETRS89) -> EPSG:4326 (lon/lat WGS84)
transformer_to_wgs = Transformer.from_crs('EPSG:25832', 'EPSG:4326', always_xy=True)

def _transform_df(df: pd.DataFrame, prefix_origin='origin', prefix_dest='dest'):
    if df.empty:
        return df
    if f'{prefix_origin}_lon' not in df.columns or f'{prefix_origin}_lat' not in df.columns:
        return df
    ox = df[f'{prefix_origin}_lon'].values  # Easting
    oy = df[f'{prefix_origin}_lat'].values  # Northing
    dx = df[f'{prefix_dest}_lon'].values
    dy = df[f'{prefix_dest}_lat'].values
    o_lon_wgs, o_lat_wgs = transformer_to_wgs.transform(ox, oy)
    d_lon_wgs, d_lat_wgs = transformer_to_wgs.transform(dx, dy)
    df[f'{prefix_origin}_lon_wgs84'] = o_lon_wgs
    df[f'{prefix_origin}_lat_wgs84'] = o_lat_wgs
    df[f'{prefix_dest}_lon_wgs84'] = d_lon_wgs
    df[f'{prefix_dest}_lat_wgs84'] = d_lat_wgs
    return df

expanded_od_meta_wgs = _transform_df(expanded_od_meta.copy())
print('expanded_od_meta_wgs added columns:', [c for c in expanded_od_meta_wgs.columns if c.endswith('_wgs84')])

if 'schedule_df' in globals():
    schedule_df_wgs = _transform_df(schedule_df.copy())
    print('schedule_df_wgs added columns:', [c for c in schedule_df_wgs.columns if c.endswith('_wgs84')])
else:
    schedule_df_wgs = None
    print('schedule_df not defined – run slice expansion first.')

# Optional persistence
out_wgs_pairs = REPO_ROOT / 'output' / 'scenario_V10_2025' / 'od_meta_expanded_wgs84.parquet'
expanded_od_meta_wgs.to_parquet(out_wgs_pairs, index=False)
print('Saved:', out_wgs_pairs)
if schedule_df_wgs is not None:
    out_wgs_sched = REPO_ROOT / 'output' / 'scenario_V10_2025' / 'schedule_pairs_times_wgs84.parquet'
    schedule_df_wgs.to_parquet(out_wgs_sched, index=False)
    print('Saved:', out_wgs_sched)

expanded_od_meta_wgs.head()

expanded_od_meta_wgs added columns: ['origin_lon_wgs84', 'origin_lat_wgs84', 'dest_lon_wgs84', 'dest_lat_wgs84']
schedule_df_wgs added columns: ['origin_lon_wgs84', 'origin_lat_wgs84', 'dest_lon_wgs84', 'dest_lat_wgs84']
Saved: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\scenario_V10_2025\od_meta_expanded_wgs84.parquet
schedule_df_wgs added columns: ['origin_lon_wgs84', 'origin_lat_wgs84', 'dest_lon_wgs84', 'dest_lat_wgs84']
Saved: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\scenario_V10_2025\od_meta_expanded_wgs84.parquet
Saved: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\scenario_V10_2025\schedule_pairs_times_wgs84.parquet
Saved: C:\Users\bienzeisler\Documents\GitHub\RVB-VISUM-analysis\output\scenario_V10_2025\schedule_pairs_times_wgs84.parquet


Unnamed: 0,od_index,pair_id,row_id,origin_endpoint_index,destination_endpoint_index,origin_lat,origin_lon,dest_lat,dest_lon,n_connectors_o,...,destinations,origin_name,destination_name,usage_rebus,pt_transfers,pt_trips,origin_lon_wgs84,origin_lat_wgs84,dest_lon_wgs84,dest_lat_wgs84
0,0,ROW_0_O1_D1,0,0,0,5791661.0,603753.1947,5791529.0,604042.810792,8,...,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,10.52038,52.26553,10.524582,52.264288
1,1,ROW_0_O1_D2,0,0,1,5791661.0,603753.1947,5791511.0,604170.7251,8,...,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,10.52038,52.26553,10.52645,52.26411
2,2,ROW_0_O1_D3,0,0,2,5791661.0,603753.1947,5791384.0,603935.1407,8,...,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,10.52038,52.26553,10.52296,52.26301
3,3,ROW_0_O1_D4,0,0,3,5791661.0,603753.1947,5791416.0,603781.5326,8,...,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,10.52038,52.26553,10.52072,52.26333
4,4,ROW_0_O2_D1,0,1,0,5791549.0,604048.4156,5791529.0,604042.810792,8,...,310101902,BS Stadtkern 6,BS Stadtkern 9,0.199772,0.002856,22.912864,10.52467,52.26447,10.524582,52.264288


### Single-pair routing timing (estimate total runtime)
We route one OD endpoint pair over all time slices to estimate average request duration and extrapolate total runtime.

In [50]:
import time
from statistics import mean

# Choose one pair (first row) from expanded_od_meta_wgs / fallback expanded_od_meta
if 'expanded_od_meta_wgs' in globals() and not expanded_od_meta_wgs.empty:
    sample_pair = expanded_od_meta_wgs.iloc[0]
elif 'expanded_od_meta' in globals() and not expanded_od_meta.empty:
    sample_pair = expanded_od_meta.iloc[0]
else:
    raise RuntimeError('No expanded OD meta available.')

print('Sample pair_id:', sample_pair.get('pair_id'), 'origin_idx:', sample_pair.get('origin_endpoint_index'), 'dest_idx:', sample_pair.get('destination_endpoint_index'))

# Build list of departure times (reuse slice_times if already built)
if 'slice_times' not in globals():
    # Reconstruct if missing
    DATE = '2025-05-13'
    START_H, END_H, INTERVAL_MIN = 6, 22, 15
    from datetime import datetime, timedelta, timezone
    start_dt = datetime.fromisoformat(f"{DATE}T{START_H:02d}:00:00+00:00")
    end_dt = datetime.fromisoformat(f"{DATE}T{END_H:02d}:00:00+00:00")
    cur = start_dt
    slice_times = []
    while cur <= end_dt:
        slice_times.append(cur.astimezone(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'))
        cur += timedelta(minutes=INTERVAL_MIN)

print('Number of slices:', len(slice_times))

# Extract coordinates (assuming we transformed to WGS already if using _wgs84 columns)
if 'origin_lon_wgs84' in sample_pair and not pd.isna(sample_pair['origin_lon_wgs84']):
    o_lon = sample_pair['origin_lon_wgs84']
    o_lat = sample_pair['origin_lat_wgs84']
    d_lon = sample_pair['dest_lon_wgs84']
    d_lat = sample_pair['dest_lat_wgs84']
else:
    # Assume already lon/lat in WGS order (if earlier generation produced WGS directly)
    o_lon = sample_pair['origin_lon']
    o_lat = sample_pair['origin_lat']
    d_lon = sample_pair['dest_lon']
    d_lat = sample_pair['dest_lat']

coord_tuple = (o_lat, o_lon, d_lat, d_lon)  # router expects (olat, olon, dlat, dlon)

async def route_single_pair_all_times(times, coord):
    durations = []
    responses = []
    for t in times:
        t0 = time.time()
        res_list = await router.batch_pt_routes_safe([coord], departure_time=t)
        dt_s = time.time() - t0
        durations.append(dt_s)
        # store the single result object for potential inspection
        responses.append({'departure_time': t, 'result': res_list[0] if res_list else None, 'duration_s': dt_s})
    return durations, responses

# Run async (respect existing loop)
try:
    loop = asyncio.get_event_loop()
    if loop.is_running():
        durations, single_responses = await route_single_pair_all_times(slice_times, coord_tuple)  # type: ignore # noqa
    else:
        durations, single_responses = loop.run_until_complete(route_single_pair_all_times(slice_times, coord_tuple))
except RuntimeError:
    durations, single_responses = asyncio.run(route_single_pair_all_times(slice_times, coord_tuple))

avg_s = mean(durations)
print(f"Avg per request: {avg_s:.3f}s | min {min(durations):.3f}s | max {max(durations):.3f}s")

# Estimate total runtime for all pairs * all slices
if 'expanded_od_meta' in globals():
    total_pairs = len(expanded_od_meta)
    est_total_seconds = total_pairs * len(slice_times) * avg_s
    est_hours = est_total_seconds / 3600.0
    print(f"Estimated total runtime: {est_total_seconds:,.0f}s (~{est_hours:,.2f} h) for {total_pairs} pairs * {len(slice_times)} slices.")

# Convert response summary to DataFrame (optional)
sample_time_results = pd.DataFrame(single_responses)
sample_time_results.head()

Sample pair_id: ROW_0_O1_D1 origin_idx: 0 dest_idx: 0
Number of slices: 65
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
Routing batch 1 (0 to 0)...
R

Unnamed: 0,departure_time,result,duration_s
0,2025-05-13T06:00:00Z,"{'hints': {'visited_nodes.sum': 2018, 'visited...",0.260494
1,2025-05-13T06:15:00Z,"{'hints': {'visited_nodes.sum': 2069, 'visited...",0.258932
2,2025-05-13T06:30:00Z,"{'hints': {'visited_nodes.sum': 2049, 'visited...",0.260495
3,2025-05-13T06:45:00Z,"{'hints': {'visited_nodes.sum': 2067, 'visited...",0.26374
4,2025-05-13T07:00:00Z,"{'hints': {'visited_nodes.sum': 1995, 'visited...",0.264506
