# Google Routes API: Single-Route Test

Tests one bicycle route fetch to validate API key, request format, and response.
Uses `routes_fetch.py` for cache-before-fetch and force-fetch support.

## Setup

In [1]:
# =============================================================================
# SETUP: Paths, load .env, get API key
# =============================================================================
from pathlib import Path
import json
import os
import sys

from dotenv import load_dotenv

load_dotenv()

cwd = Path.cwd()
project_root = cwd if (cwd / "package.json").exists() else cwd.parent.parent
prepared_dir = project_root / "prepared-data"
cache_dir = project_root / "routes-cache"

sys.path.insert(0, str(project_root / "data-pipeline"))
from routes_fetch import fetch_route, fetch_routes_batch

api_key = os.environ.get("GOOGLE_ROUTES_API_KEY")
if not api_key:
    raise RuntimeError(
        "Set GOOGLE_ROUTES_API_KEY in environment or .env file. "
        "Copy .env.example to .env and add your key."
    )

print("Project root:", project_root)
print("Cache dir:", cache_dir)

Project root: c:\Users\Nicol\Desktop\INF252-Course-Project
Cache dir: c:\Users\Nicol\Desktop\INF252-Course-Project\routes-cache


## Load Stations

In [2]:
# =============================================================================
# Load stations from stations.json or isochrones.json (fallback)
# =============================================================================
stations_path = prepared_dir / "stations.json"
if not stations_path.exists():
    stations_path = prepared_dir / "isochrones.json"

if not stations_path.exists():
    raise FileNotFoundError(
        f"No stations file found. Run stations_prepare.ipynb first, "
        f"or ensure isochrones.json exists in {prepared_dir}"
    )

with open(stations_path, encoding="utf-8") as f:
    data = json.load(f)

stations = data.get("data", data).get("stations", data.get("stations", []))
print(f"Loaded {len(stations)} stations from {stations_path.name}")

Loaded 292 stations from stations.json


## Fetch Single Route

In [3]:
# =============================================================================
# Fetch route: Tøyenparken (377) -> Grønlands torg (381)
# Set FORCE_ROUTES_FETCH=1 in env to bypass cache during development.
# =============================================================================
origin_id = "377"
dest_id = "381"

result = fetch_route(origin_id, dest_id, stations, api_key, cache_dir)

cached = result.get("cached", False)
resp = result.get("response", {})
routes = resp.get("routes", [])

if routes:
    r = routes[0]
    duration = r.get("duration", "N/A")
    distance_m = r.get("distanceMeters", "N/A")
    print(f"Success (cached={cached})")
    print(f"  Duration: {duration}")
    print(f"  Distance: {distance_m} m")
else:
    print(f"No route returned. Response: {resp}")

Success (cached=True)
  Duration: 184s
  Distance: 1282 m


## Fetch Batch: Top N Stations + Top K Connections (~1000 requests)

1. Top N stations by total_trips (incoming + outgoing)
2. For each station, top K connections from trip data (pairs where that station is origin or dest)
3. Fetch all unique pairs in both directions (target ~1000; only uncached trigger API calls)

In [4]:
# =============================================================================
# Step 1: Top N stations by total_trips (incoming + outgoing)
# =============================================================================
TOP_N = 50

stations_sorted = sorted(
    stations,
    key=lambda s: int(s.get("total_trips", 0)),
    reverse=True,
)
top_stations = stations_sorted[:TOP_N]
top_ids = {str(s["id"]) for s in top_stations}
station_by_id = {str(s["id"]): s for s in stations}

print(f"Top {TOP_N} stations by total_trips:")
for s in top_stations:
    print(f"  {s['id']}: {s['name']} ({s.get('total_trips', 0):,} trips)")

Top 50 stations by total_trips:
  421: Alexander Kiellands Plass (263,345 trips)
  551: Olaf Ryes plass (259,527 trips)
  489: Torggata (257,818 trips)
  398: Ringnes Park (249,088 trips)
  480: Helga Helgesens plass (245,850 trips)
  443: Sjøsiden øst (231,478 trips)
  479: Tjuvholmen (222,466 trips)
  464: Sukkerbiten (220,405 trips)
  396: Kirkeristen (215,740 trips)
  408: Tøyen skole (196,753 trips)
  446: Bislett Stadion (176,568 trips)
  478: Jernbanetorget (174,143 trips)
  384: Vår Frelsers gravlund sør (167,986 trips)
  494: Rådhusbrygge 4 (167,848 trips)
  465: Bjørvika (166,401 trips)
  460: Botanisk Hage sør (162,591 trips)
  424: Birkelunden (160,605 trips)
  423: Schous plass (158,326 trips)
  507: Jens Bjelkes gate (157,514 trips)
  412: Jakob kirke (153,994 trips)
  437: Sentrum Scene (153,270 trips)
  493: Sofienbergparken nordvest (148,467 trips)
  444: AHO (148,180 trips)
  413: Majorstuen (146,477 trips)
  598: Sofienbergparken nord (137,559 trips)
  521: Jess Carl

In [5]:
# =============================================================================
# Step 2: For each top-N station, find top K connections from trip data
# =============================================================================
raw_dir = project_root / "raw-data"
CONNECTIONS_PER_STATION = 20

# Count (origin, dest) pairs from trip data
pair_counts = {}
if raw_dir.exists():
    for year_dir in sorted(raw_dir.iterdir()):
        if not year_dir.is_dir():
            continue
        for json_path in sorted(year_dir.glob("*.json")):
            with open(json_path, encoding="utf-8") as f:
                data = json.load(f)
            trips = data if isinstance(data, list) else data.get("data", data.get("trips", []))
            for t in trips:
                oid = str(t.get("start_station_id", ""))
                did = str(t.get("end_station_id", ""))
                if not oid or not did or oid == did:
                    continue
                pair_counts[(oid, did)] = pair_counts.get((oid, did), 0) + 1
else:
    raise FileNotFoundError("raw-data/ not found. Run npm run download first.")

print(f"Total observed (origin, dest) pairs in trip data: {len(pair_counts):,}")

# For each top-N station, get top K pairs (where station is origin or dest)
# Only keep pairs where both stations exist in our stations data
unique_pairs = set()
for sid in top_ids:
    candidates = [
        (p, c) for p, c in pair_counts.items()
        if (p[0] == sid or p[1] == sid)
        and p[0] in station_by_id and p[1] in station_by_id
    ]
    candidates.sort(key=lambda x: -x[1])
    for (oid, did), _ in candidates[:CONNECTIONS_PER_STATION]:
        unique_pairs.add((oid, did))

print(f"Unique pairs from top {CONNECTIONS_PER_STATION} connections per station: {len(unique_pairs)}")

Total observed (origin, dest) pairs in trip data: 80,805
Unique pairs from top 20 connections per station: 745


In [6]:
# =============================================================================
# Step 3: Fetch both directions per pair, cap at ~1000 (only uncached hit API)
# =============================================================================
TARGET_FETCHES = 1000

fetch_list = []
for (oid, did) in unique_pairs:
    fetch_list.append((oid, did))
    fetch_list.append((did, oid))

# Deduplicate (in case (A,B) and (B,A) both came from top connections)
seen = set()
deduped = []
for oid, did in fetch_list:
    key = (oid, did)
    if key not in seen:
        seen.add(key)
        deduped.append((oid, did))

# Sort by frequency (most-traveled routes first), cap at TARGET_FETCHES
def route_freq(p):
    oid, did = p
    return pair_counts.get((oid, did), 0) + pair_counts.get((did, oid), 0)

deduped.sort(key=route_freq, reverse=True)
fetch_list = deduped[:TARGET_FETCHES]

# Count how many of our fetch_list are already cached (no API call for these)
single_dir = cache_dir / "single"
cached_in_list = (
    sum(1 for (oid, did) in fetch_list if (single_dir / f"{oid}_{did}.json").exists())
    if single_dir.exists() else 0
)
print(f"Total to process: {len(fetch_list)} (capped at {TARGET_FETCHES})")
print(f"Already cached: {cached_in_list} → ~{len(fetch_list) - cached_in_list} new API calls")

results = fetch_routes_batch(
    fetch_list,
    stations,
    api_key,
    cache_dir,
)
cached_count = sum(1 for r in results if r.get("cached"))
fetched_count = len(results) - cached_count
print(f"Done: fetched {fetched_count}, used cache {cached_count}")

Total to process: 1000 (capped at 1000)
Already cached: 65 → ~935 new API calls
Done: fetched 935, used cache 65
