In [1]:
%pip install osmnx

Collecting osmnx
  Obtaining dependency information for osmnx from https://files.pythonhosted.org/packages/0e/a7/8606797abfd9a47cd11f26ca7d70b818d9e2f5346811d797efb3429b7603/osmnx-2.0.7-py3-none-any.whl.metadata
  Downloading osmnx-2.0.7-py3-none-any.whl.metadata (4.9 kB)
Collecting geopandas>=1.0.1 (from osmnx)
  Obtaining dependency information for geopandas>=1.0.1 from https://files.pythonhosted.org/packages/0b/70/d5cd0696eff08e62fdbdebe5b46527facb4e7220eabe0ac6225efab50168/geopandas-1.1.1-py3-none-any.whl.metadata
  Downloading geopandas-1.1.1-py3-none-any.whl.metadata (2.3 kB)
Collecting shapely>=2.0 (from osmnx)
  Obtaining dependency information for shapely>=2.0 from https://files.pythonhosted.org/packages/13/02/58b0b8d9c17c93ab6340edd8b7308c0c5a5b81f94ce65705819b7416dba5/shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata
  Downloading shapely-2.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (6.8 kB)
Collecting pyogrio>=0.7

In [4]:
# Extract Berlin road network
import osmnx as ox

G = ox.graph_from_place("Berlin, Germany", network_type="drive")
edges = ox.graph_to_gdfs(G, nodes=False)

# Create unique road ID from MultiIndex
edges["road_id"] = edges.index.map(lambda idx: f"{idx[0]}_{idx[1]}_{idx[2]}")

# Save
edges.to_file("data/berlin_roads.geojson", driver="GeoJSON")


In [6]:
# Create dummy forecast data

import json
import random
import geopandas as gpd

# Load the roads we just created
roads = gpd.read_file("data/berlin_roads.geojson")

# Get all road IDs
road_ids = roads["road_id"].tolist()

# Generate dummy forecast data (24 hours of congestion values between 0 and 1)
forecast_data = {
    road_id: [round(random.random(), 2) for _ in range(25)]
    for road_id in road_ids
}

# Create forecast JSON
forecast = {
    "generated_at": "2025-01-01T12:00:00Z",
    "data": forecast_data
}

# Save to file
with open("data/forecast.json", "w") as f:
    json.dump(forecast, f, indent=2)

print(f"Generated forecast for {len(road_ids)} roads")

  return pyogrio.read_dataframe(path_or_bytes, bbox=bbox, **kwargs)


Generated forecast for 73687 roads


In [1]:
# Precompute and save the road network graph for routing
import osmnx as ox

# Download Berlin drive graph (heavy operation)
G = ox.graph_from_place("Berlin, Germany", network_type="drive")

# Save graph to file
ox.save_graphml(G, "data/berlin_drive.graphml")


In [2]:
import pandas as pd
import geopandas as gpd
import json

# ---- Inputs ----
PREPROCESSED_DATA_PATH = "/Data/CongestionAI/prepared_data/preprocessed_full_data.csv"
WFS_JSON_PATH = "/Data/CongestionAI/json_api_traffic_test.json"
OUT_PATH = "/Data/CongestionAI/backend/data/detector_to_wfs_segment.parquet"

# ---- Extract unique detectors from preprocessed data ----
print("Loading preprocessed data (only needed columns)...")
det = pd.read_csv(
    PREPROCESSED_DATA_PATH, 
    usecols=["detector_id", "lon", "lat"]
).drop_duplicates(subset=["detector_id"])
print(f"Found {len(det)} unique detectors")

# Convert to GeoDataFrame
g_det = gpd.GeoDataFrame(
    det,
    geometry=gpd.points_from_xy(det["lon"], det["lat"]),
    crs="EPSG:4326",
)

# ---- Load WFS segments from JSON (it's actually valid GeoJSON) ----
print("Loading WFS segments...")
g_seg = gpd.read_file(WFS_JSON_PATH)
print(f"Loaded {len(g_seg)} road segments")

# Ensure the id column exists
if "unique_id" not in g_seg.columns:
    raise ValueError("WFS segments missing 'unique_id' in properties")

# ---- Work in metric CRS for correct distances ----
# Berlin: EPSG:25833 is a good metric CRS
g_det_m = g_det.to_crs("EPSG:25833")
g_seg_m = g_seg.to_crs("EPSG:25833")[["unique_id", "geometry"]]

# ---- Nearest segment per detector ----
print("Finding nearest segment for each detector...")
joined = gpd.sjoin_nearest(
    g_det_m[["detector_id", "lon", "lat", "geometry"]],
    g_seg_m,
    how="left",
    distance_col="dist_m",
)

# Optional: drop matches too far away (tune threshold)
MAX_DIST_M = 300
n_far = (joined["dist_m"] > MAX_DIST_M).sum()
joined.loc[joined["dist_m"] > MAX_DIST_M, "unique_id"] = pd.NA
print(f"Dropped {n_far} detectors > {MAX_DIST_M}m from any segment")

mapping = joined[["detector_id", "unique_id", "dist_m", "lon", "lat"]].copy()

# Save
mapping.to_parquet(OUT_PATH, index=False)
print(f"\nSaved mapping: {OUT_PATH}")
print(f"Matched: {mapping['unique_id'].notna().sum()} / {len(mapping)} detectors")
print(mapping.head(10))

Loading preprocessed data (only needed columns)...
Found 380 unique detectors
Loading WFS segments...
Loaded 18135 road segments
Finding nearest segment for each detector...
Dropped 0 detectors > 300m from any segment

Saved mapping: /Data/CongestionAI/backend/data/detector_to_wfs_segment.parquet
Matched: 580 / 580 detectors
       detector_id                      unique_id     dist_m        lon  \
0  100101010000369  32450044_32460012.01_32450044   1.532453  13.192747   
1  100101010000874  36450013_36460004.01_36450013   4.307244  13.261301   
2  100101010000975  36450013_36460004.01_36450013   4.307244  13.261301   
3  100101010001076  36450014_36450015.01_36450014   2.004016  13.263105   
3  100101010001076  36450014_36450015.01_36450015   2.004016  13.263105   
4  100101010001177  36450014_36450015.01_36450014   2.004016  13.263105   
4  100101010001177  36450014_36450015.01_36450015   2.004016  13.263105   
5  100101010001379  36450013_36450040.01_36450013   2.220923  13.259881  