# Google Routes API: Single-Route Test

Tests one bicycle route fetch to validate API key, request format, and response.
Uses `routes_fetch.py` for cache-before-fetch and force-fetch support.

## Setup

In [2]:
# =============================================================================
# SETUP: Paths, load .env, get API key
# =============================================================================
from pathlib import Path
import json
import os
import sys

from dotenv import load_dotenv

load_dotenv()

cwd = Path.cwd()
project_root = cwd if (cwd / "package.json").exists() else cwd.parent.parent
prepared_dir = project_root / "prepared-data"
cache_dir = project_root / "routes-cache"

sys.path.insert(0, str(project_root / "data-pipeline"))
from routes_fetch import fetch_route, fetch_routes_batch

api_key = os.environ.get("GOOGLE_ROUTES_API_KEY")
if not api_key:
    raise RuntimeError(
        "Set GOOGLE_ROUTES_API_KEY in environment or .env file. "
        "Copy .env.example to .env and add your key."
    )

print("Project root:", project_root)
print("Cache dir:", cache_dir)

Project root: c:\Users\Nicol\Desktop\INF252-Course-Project
Cache dir: c:\Users\Nicol\Desktop\INF252-Course-Project\routes-cache


## Load Stations

In [3]:
# =============================================================================
# Load stations from stations.json or isochrones.json (fallback)
# =============================================================================
stations_path = prepared_dir / "stations.json"
if not stations_path.exists():
    stations_path = prepared_dir / "isochrones.json"

if not stations_path.exists():
    raise FileNotFoundError(
        f"No stations file found. Run stations_prepare.ipynb first, "
        f"or ensure isochrones.json exists in {prepared_dir}"
    )

with open(stations_path, encoding="utf-8") as f:
    data = json.load(f)

stations = data.get("data", data).get("stations", data.get("stations", []))
print(f"Loaded {len(stations)} stations from {stations_path.name}")

Loaded 292 stations from stations.json


## Fetch Single Route

In [4]:
# =============================================================================
# Fetch route: Tøyenparken (377) -> Grønlands torg (381)
# Set FORCE_ROUTES_FETCH=1 in env to bypass cache during development.
# =============================================================================
origin_id = "377"
dest_id = "381"

result = fetch_route(origin_id, dest_id, stations, api_key, cache_dir)

cached = result.get("cached", False)
resp = result.get("response", {})
routes = resp.get("routes", [])

if routes:
    r = routes[0]
    duration = r.get("duration", "N/A")
    distance_m = r.get("distanceMeters", "N/A")
    print(f"Success (cached={cached})")
    print(f"  Duration: {duration}")
    print(f"  Distance: {distance_m} m")
else:
    print(f"No route returned. Response: {resp}")

Success (cached=True)
  Duration: 184s
  Distance: 1282 m


## Fetch Batch: Top 10 Stations + Top 5 Connections Each (max 100 requests)

1. Top 10 stations by total_trips (incoming + outgoing)
2. For each station, top 5 connections from trip data (pairs where that station is origin or dest)
3. Fetch all unique pairs in both directions (max 100 API calls)

In [5]:
# =============================================================================
# Step 1: Top 10 stations by total_trips (incoming + outgoing)
# =============================================================================
TOP_N = 10

stations_sorted = sorted(
    stations,
    key=lambda s: int(s.get("total_trips", 0)),
    reverse=True,
)
top_stations = stations_sorted[:TOP_N]
top_ids = {str(s["id"]) for s in top_stations}
station_by_id = {str(s["id"]): s for s in stations}

print(f"Top {TOP_N} stations by total_trips:")
for s in top_stations:
    print(f"  {s['id']}: {s['name']} ({s.get('total_trips', 0):,} trips)")

Top 10 stations by total_trips:
  421: Alexander Kiellands Plass (263,345 trips)
  551: Olaf Ryes plass (259,527 trips)
  489: Torggata (257,818 trips)
  398: Ringnes Park (249,088 trips)
  480: Helga Helgesens plass (245,850 trips)
  443: Sjøsiden øst (231,478 trips)
  479: Tjuvholmen (222,466 trips)
  464: Sukkerbiten (220,405 trips)
  396: Kirkeristen (215,740 trips)
  408: Tøyen skole (196,753 trips)

Observed pairs involving top 10: 5682


KeyboardInterrupt: 

In [None]:
# =============================================================================
# Step 2: For each top-10 station, find top 5 connections from trip data
# =============================================================================
raw_dir = project_root / "raw-data"
CONNECTIONS_PER_STATION = 5

# Count (origin, dest) pairs from trip data
pair_counts = {}
if raw_dir.exists():
    for year_dir in sorted(raw_dir.iterdir()):
        if not year_dir.is_dir():
            continue
        for json_path in sorted(year_dir.glob("*.json")):
            with open(json_path, encoding="utf-8") as f:
                data = json.load(f)
            trips = data if isinstance(data, list) else data.get("data", data.get("trips", []))
            for t in trips:
                oid = str(t.get("start_station_id", ""))
                did = str(t.get("end_station_id", ""))
                if not oid or not did or oid == did:
                    continue
                pair_counts[(oid, did)] = pair_counts.get((oid, did), 0) + 1
else:
    raise FileNotFoundError("raw-data/ not found. Run npm run download first.")

# For each top-10 station, get top 5 pairs (where station is origin or dest)
# Only keep pairs where both stations exist in our stations data
unique_pairs = set()
for sid in top_ids:
    candidates = [
        (p, c) for p, c in pair_counts.items()
        if (p[0] == sid or p[1] == sid)
        and p[0] in station_by_id and p[1] in station_by_id
    ]
    candidates.sort(key=lambda x: -x[1])
    for (oid, did), _ in candidates[:CONNECTIONS_PER_STATION]:
        unique_pairs.add((oid, did))

print(f"Unique pairs from top {CONNECTIONS_PER_STATION} connections per station: {len(unique_pairs)}")

In [None]:
# =============================================================================
# Step 3: Fetch both directions per pair, then run batch (≤100 requests)
# =============================================================================
fetch_list = []
for (oid, did) in unique_pairs:
    fetch_list.append((oid, did))
    fetch_list.append((did, oid))

# Deduplicate (in case (A,B) and (B,A) both came from top connections)
seen = set()
deduped = []
for oid, did in fetch_list:
    key = (oid, did)
    if key not in seen:
        seen.add(key)
        deduped.append((oid, did))

fetch_list = deduped
print(f"Total requests (both directions): {len(fetch_list)}")
assert len(fetch_list) <= 100, f"Too many requests: {len(fetch_list)} > 100"

results = fetch_routes_batch(
    fetch_list,
    stations,
    api_key,
    cache_dir,
)
cached_count = sum(1 for r in results if r.get("cached"))
fetched_count = len(results) - cached_count
print(f"Fetched: {fetched_count}, cached: {cached_count}")