Installations

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import os, re
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from IPython.display import display

Paths & Parameters

In [None]:
# -------------------
# Paths (EDIT IF NEEDED)
# -------------------
CRASHES_PATH       = "/content/drive/My Drive/Crashes_in_DC.csv"
CAMERAS_PATH       = "/content/drive/My Drive/Automated_Traffic_Enforcement.csv"
SPEEDHUMPS_PATH    = "/content/drive/My Drive/Speed_Humps.csv"
OUT_DIR            = "/content/drive/My Drive/outputs"
SPEED_GEOJSON_PATH = "/content/drive/My Drive/Roadway_SubBlock.geojson"
os.makedirs(OUT_DIR, exist_ok=True)

# -------------------
# Parameters
# -------------------
DATE_START = "2020-01-01"
DATE_END   = "2025-04-30"
MAR_MIN    = 100


Helper functions, gets rid of junk data and gives everything a standardized name.

In [None]:
def clean_intersection(s):
    if s is None or (isinstance(s, float) and np.isnan(s)): return ""
    s = str(s).strip()
    if re.fullmatch(r"(nan|none|null|missing)", s, flags=re.I): return ""
    if "Intersecting RouteID" in s or "*" in s: return ""
    s = re.sub(r"\s*[/@&]\s*", " & ", s)
    return re.sub(r"\s+", " ", s).strip()

def extract_street_name(row):
    txt = None
    if "MAR_Address" in row and isinstance(row["MAR_Address"], str):
        txt = row["MAR_Address"]
    elif "ADDRESS" in row and isinstance(row["ADDRESS"], str):
        txt = row["ADDRESS"]
    if txt:
        t = txt.upper().strip()
        t = re.split(r"\s*&\s*|\s*@\s*|\s*/\s*|,| - ", t)[0]
        t = re.sub(r"\b(NE|NW|SE|SW)\b", "", t)
        return re.sub(r"\s+", " ", t).strip()
    inter = str(row.get("NEARESTINTSTREETNAME", "")).upper().strip()
    if inter:
        return re.split(r"\s*&\s*|\s*@\s*|\s*/\s*|,| - ", inter)[0].strip()
    return ""

def standardize_name(df, out_col="NAME", candidates=None, fallback_series=None):
    if candidates is None: candidates = []
    out = pd.Series("", index=df.index, dtype=object)
    for c in candidates:
        if c in df.columns:
            cand = df[c].fillna("").astype(str).str.strip()
            out = np.where(out != "", out, cand)
    if fallback_series is not None:
        fb = fallback_series.fillna("").astype(str).str.strip()
        out = np.where(out != "", out, fb)
    out = pd.Series(out, index=df.index).replace("", "(UNNAMED)")
    df[out_col] = out
    return df


Load and clean crashes. Filter for MAR score and for date.
Also define the calculations for the severity sum.

In [None]:
# -------------------
# 4) Load & clean crashes (WITH severity score; no distances)
# -------------------
df = pd.read_csv(CRASHES_PATH, dtype={"STREETSEGID": str}, low_memory=False)

# Keep speeding-related only
df["SPEEDING_INVOLVED"] = pd.to_numeric(df["SPEEDING_INVOLVED"], errors="coerce")
df = df[df["SPEEDING_INVOLVED"] > 0].copy()

# Valid lat/lon
df = df.dropna(subset=["LATITUDE", "LONGITUDE"]).copy()
df["LATITUDE"]  = pd.to_numeric(df["LATITUDE"], errors="coerce")
df["LONGITUDE"] = pd.to_numeric(df["LONGITUDE"], errors="coerce")
df = df.dropna(subset=["LATITUDE", "LONGITUDE"]).copy()

# Date window
df["FROMDATE"] = pd.to_datetime(df["FROMDATE"], errors="coerce")
df = df[(df["FROMDATE"] >= DATE_START) & (df["FROMDATE"] <= DATE_END)].copy()

# MAR quality (if present)
if "MAR_SCORE" in df.columns:
    df["MAR_SCORE"] = pd.to_numeric(df["MAR_SCORE"], errors="coerce")
    df = df[df["MAR_SCORE"] >= MAR_MIN].copy()

# ----- Injury columns & SEVERITY_SCORE -----
# Define injury categories (robust to missing columns)
injury_categories = {
    "BICYCLIST":  ["MAJORINJURIES_BICYCLIST","MINORINJURIES_BICYCLIST","UNKNOWNINJURIES_BICYCLIST","FATAL_BICYCLIST"],
    "DRIVER":     ["MAJORINJURIES_DRIVER","MINORINJURIES_DRIVER","UNKNOWNINJURIES_DRIVER","FATAL_DRIVER"],
    "PEDESTRIAN": ["MAJORINJURIES_PEDESTRIAN","MINORINJURIES_PEDESTRIAN","UNKNOWNINJURIES_PEDESTRIAN","FATAL_PEDESTRIAN"],
    "PASSENGER":  ["MAJORINJURIESPASSENGER","MINORINJURIESPASSENGER","FATALPASSENGER"],
    "OTHER":      ["MAJORINJURIESOTHER","MINORINJURIESOTHER","FATALOTHER"],
}

fatal_cols = [c for cols in injury_categories.values() for c in cols if "FATAL" in c]
major_cols = [c for cols in injury_categories.values() for c in cols if "MAJOR" in c]
minor_cols = [c for cols in injury_categories.values() for c in cols if "MINOR" in c]

# Ensure all referenced injury columns exist & are numeric
for col in set(fatal_cols + major_cols + minor_cols):
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce").fillna(0)
    else:
        df[col] = 0

# Keep only crashes with at least one injury (exclude property-only)
injury_cols_all = fatal_cols + major_cols + minor_cols
df = df[df[injury_cols_all].sum(axis=1) > 0].copy()

# Weighted severity score
df["SEVERITY_SCORE"] = (
    7 * df[fatal_cols].sum(axis=1) +
    4 * df[major_cols].sum(axis=1) +
    1 * df[minor_cols].sum(axis=1)
)

# ----- Standardize names (no geometry ops yet) -----
if "NEARESTINTSTREETNAME" not in df.columns:
    df["NEARESTINTSTREETNAME"] = ""
df["NEARESTINTSTREETNAME"] = df["NEARESTINTSTREETNAME"].apply(clean_intersection)
df["PRIMARY_STREET"] = df.apply(extract_street_name, axis=1).str.upper()

df = standardize_name(
    df,
    out_col="NAME",
    candidates=["PRIMARY_STREET","NEARESTINTSTREETNAME"],
    fallback_series=None
)

# (No geometry or projections here; distances come later.)
print(f"[Crashes after filters] {len(df):,} rows, with SEVERITY_SCORE computed.")

Load cameras and humps

In [None]:
cams = pd.read_csv(CAMERAS_PATH)
cams = cams.rename(columns={"CAMERA_LATITUDE": "LATITUDE","CAMERA_LONGITUDE": "LONGITUDE"})
cams = cams.dropna(subset=["LATITUDE", "LONGITUDE"]).copy()
cams["LATITUDE"]  = pd.to_numeric(cams["LATITUDE"], errors="coerce")
cams["LONGITUDE"] = pd.to_numeric(cams["LONGITUDE"], errors="coerce")
cams = cams.dropna(subset=["LATITUDE", "LONGITUDE"]).copy()

# Speed humps
humps = pd.read_csv(SPEEDHUMPS_PATH)
humps = humps.dropna(subset=["LATITUDE","LONGITUDE"]).copy()
humps["LATITUDE"]  = pd.to_numeric(humps["LATITUDE"], errors="coerce")
humps["LONGITUDE"] = pd.to_numeric(humps["LONGITUDE"], errors="coerce")
humps = humps.dropna(subset=["LATITUDE","LONGITUDE"]).copy()

Summaries of total number accidents, speed cameras, and speedbumps to help figure out what time of clustering should be done

In [None]:
print("=== Current Totals (pre-distance) ===")
print(f"Crashes being counted: {len(df):,}")
print(f"Speed cameras: {len(cams):,}")
print(f"Speed humps:   {len(humps):,}")
print(f"Total devices: {len(cams) + len(humps):,}")

There are few crashes and total devices, so we can just do brute force distance calculations using haversine metrics to figure out distance between each crash and its nearest device. We can do this because there are only a total of 1,210 * 589 different calculations that are being run. I also used Euclidean distances as I did in the night accidents

Before we do that though, we must assign all speed bumps, cameras, and accidents onto their actual street so that when we do our distance calculations they are all on the proper road

Load the DDOT lines

In [None]:
# --- 3.a) DDOT lines → standardized street name ("street_base") ---
import re, numpy as np, pandas as pd, geopandas as gpd
from shapely.geometry import Point

CRS_METERS = 32618
MAX_LATERAL_M = 20  # lateral tolerance to snap a point to a street

# Load and project
gdf_limits_4326 = gpd.read_file(SPEED_GEOJSON_PATH)
if gdf_limits_4326.crs is None:
    gdf_limits_4326 = gdf_limits_4326.set_crs(4326)
gdf_limits_m = gdf_limits_4326.to_crs(CRS_METERS).copy()

# Pick a DDOT street-name column and normalize it
name_cands_ddot = [c for c in ["STREETNAME","ROUTENAME","NAME","FULLNAME","SEGMENTNAME"] if c in gdf_limits_m.columns]
ddot_name_col = name_cands_ddot[0] if name_cands_ddot else None

def norm_street(s: str) -> str:
    if s is None or (isinstance(s,float) and np.isnan(s)): return ""
    s = str(s).upper().strip()
    # remove quadrant suffixes and stray punctuation
    s = re.sub(r"\b(NE|NW|SE|SW)\b", "", s)
    s = re.sub(r"[^A-Z0-9 &'-]", " ", s)
    s = re.sub(r"\s+", " ", s).strip()
    return s

if ddot_name_col:
    gdf_limits_m["street_base"] = gdf_limits_m[ddot_name_col].apply(norm_street)
else:
    # no name column? fallback: empty (we can still use nearest line later if needed)
    gdf_limits_m["street_base"] = ""

# Keep geometry + street_base only (index will serve as a line id if needed later)
ddot_lines = gdf_limits_m[["geometry","street_base"]].copy()


Assign the devices to the nearest DDOT line

In [None]:
# 2) ASSIGN DEVICES — map to nearest DDOT line, inherit street_base
# Expects: cams, humps dataframes with LONGITUDE/LATITUDE

# Build device GeoDataFrames (WGS84 → meters)
gdf_cams_4326  = gpd.GeoDataFrame(cams.copy(),  geometry=gpd.points_from_xy(cams["LONGITUDE"],  cams["LATITUDE"]),  crs=4326)
gdf_humps_4326 = gpd.GeoDataFrame(humps.copy(), geometry=gpd.points_from_xy(humps["LONGITUDE"], humps["LATITUDE"]), crs=4326)
gdf_cams_4326["device_kind"]  = "camera"
gdf_humps_4326["device_kind"] = "hump"

cams_m    = gdf_cams_4326.to_crs(CRS_METERS)
humps_m   = gdf_humps_4326.to_crs(CRS_METERS)
devices_m = pd.concat([cams_m[["geometry","device_kind"]],
                       humps_m[["geometry","device_kind"]]], ignore_index=True)

# Snap to nearest DDOT line; inherit street_base
dev2line = gpd.sjoin_nearest(devices_m, ddot_lines, how="left", distance_col="dev_dist_to_line_m") \
             .rename(columns={"index_right":"ddot_idx"}) \
             .reset_index().rename(columns={"index":"device_index"})

dev2line["dev_valid_line"] = dev2line["dev_dist_to_line_m"] <= MAX_LATERAL_M
dev2line["street_base"]    = dev2line["street_base"].fillna("").astype(str)

print("Devices valid to street:", int(dev2line["dev_valid_line"].sum()), "/", len(dev2line))


Assign crashes to their nearest DDOT line

In [None]:
# 3) ASSIGN CRASHES — map to nearest DDOT line, inherit street_base
# Expects: df (filtered crashes) with LONGITUDE/LATITUDE

gdf_crashes_4326 = gpd.GeoDataFrame(df.copy(), geometry=gpd.points_from_xy(df["LONGITUDE"], df["LATITUDE"]), crs=4326)
crashes_m = gdf_crashes_4326.to_crs(CRS_METERS)

cr2line = gpd.sjoin_nearest(crashes_m, ddot_lines, how="left", distance_col="cr_dist_to_line_m") \
           .rename(columns={"index_right":"ddot_idx"}) \
           .reset_index().rename(columns={"index":"crash_index"})

cr2line["cr_valid_line"] = cr2line["cr_dist_to_line_m"] <= MAX_LATERAL_M
cr2line["street_base"]   = cr2line["street_base"].fillna("").astype(str)

print("Crashes valid to street:", int(cr2line["cr_valid_line"].sum()), "/", len(cr2line))


Drop any device or crashes not assigned to ddot line

In [None]:
# 3.5) DROP INVALID — keep only records with a valid street assignment
before_dev, before_cr = len(dev2line), len(cr2line)

dev_on_street = dev2line[dev2line["dev_valid_line"] & dev2line["street_base"].ne("")].copy()
cr_on_street  = cr2line[cr2line["cr_valid_line"] & cr2line["street_base"].ne("")].copy()

print("Devices kept:", len(dev_on_street), "/", before_dev,
      "| Crashes kept:", len(cr_on_street), "/", before_cr)


Now caculate the euclidean distances

In [None]:
# 4) DISTANCES — nearest device on the SAME STREET (keep INF if none)

import numpy as np
import geopandas as gpd
import pandas as pd

# Start from devices_m (has geometry + device_kind) and add street_base from dev_on_street
devices_m_idxed = devices_m.reset_index().rename(columns={"index": "device_index"})  # geometry, device_kind
devices_m_idxed = devices_m_idxed.merge(
    dev_on_street[["device_index", "street_base"]],
    on="device_index",
    how="left"
)

rows = []
for street, cr_grp in cr_on_street.groupby("street_base"):
    dev_grp = devices_m_idxed[devices_m_idxed["street_base"] == street]

    if dev_grp.empty:
        tmp = cr_grp.copy()
        tmp["dist_to_device_same_street_m"] = np.inf
        tmp["nearest_device_index"] = pd.NA
        tmp["nearest_device_kind"]  = pd.NA
        rows.append(tmp)
        continue

    dev_gdf = gpd.GeoDataFrame(
        dev_grp[["geometry", "device_index", "device_kind"]].copy(),
        geometry="geometry",
        crs=CRS_METERS
    )

    tmp = gpd.sjoin_nearest(
        cr_grp.set_geometry("geometry"),
        dev_gdf,
        how="left",
        distance_col="dist_to_device_same_street_m"
    ).rename(columns={"index_right": "_dev_join_row"}).copy()

    tmp["nearest_device_index"] = tmp["device_index"]
    tmp["nearest_device_kind"]  = tmp["device_kind"]
    rows.append(tmp)

same_street = pd.concat(rows, ignore_index=True) if rows else cr_on_street.copy()

finite = np.isfinite(same_street["dist_to_device_same_street_m"])
print("Same-street nearest-device distances computed.")
print(f"Finite distances: {int(finite.sum())} of {len(same_street)}")
print(
    same_street[["crash_index", "street_base", "dist_to_device_same_street_m"]]
    .head(8)
    .to_string(index=False)
)


compute the avg block size from ddot

In [None]:
# 5) THRESHOLD — compute from DDOT segment lengths (block size)

import numpy as np
import geopandas as gpd

# Ensure DDOT lines exist and are in meters CRS
try:
    _ = gdf_limits_m
except NameError:
    gdf_limits_4326 = gpd.read_file(SPEED_GEOJSON_PATH)
    if gdf_limits_4326.crs is None:
        gdf_limits_4326 = gdf_limits_4326.set_crs(4326)
    gdf_limits_m = gdf_limits_4326.to_crs(CRS_METERS).copy()

# Compute each DDOT segment's length (meters)
gdf_limits_m["seg_len_m"] = gdf_limits_m.geometry.length

# Mean and median block sizes
mean_len   = float(gdf_limits_m["seg_len_m"].mean())
median_len = float(gdf_limits_m["seg_len_m"].median())

# ✅ Use the MEDIAN block length as the threshold
THRESHOLD_M = median_len

print(f"Block length — mean: {mean_len:,.1f} m | median: {median_len:,.1f} m")
print(f"Using THRESHOLD_M = {THRESHOLD_M:,.1f} m (median block size)")


Remove the crashes that are inside of the threshold so that clustering can be ready

In [None]:
# 6) FILTER — label within/outside by THRESHOLD_M, but KEEP outside/inf for clustering

import numpy as np
import pandas as pd

# Expect same_street from Cell 4
is_finite   = np.isfinite(same_street["dist_to_device_same_street_m"])
within_mask = is_finite & (same_street["dist_to_device_same_street_m"] <= THRESHOLD_M)

kept_within_threshold     = same_street[within_mask].copy()
remaining_for_clustering  = same_street[~within_mask].copy()   # includes finite>THRESHOLD_M and inf

within_pct  = (len(kept_within_threshold) / len(same_street)) * 100
outside_pct = 100 - within_pct

print(f"Within threshold: {len(kept_within_threshold):,} / {len(same_street):,} "
      f"({within_pct:.2f}%)")
print(f"Outside threshold: {len(remaining_for_clustering):,} / {len(same_street):,} "
      f"({outside_pct:.2f}%)")

finite_outside = remaining_for_clustering[np.isfinite(remaining_for_clustering["dist_to_device_same_street_m"])]
infinite_rows  = remaining_for_clustering[~np.isfinite(remaining_for_clustering["dist_to_device_same_street_m"])]

print(f"Remaining for clustering: {len(remaining_for_clustering):,} "
      f"(finite>{THRESHOLD_M:.1f} m: {len(finite_outside):,}; inf: {len(infinite_rows):,})")

# Minimal inputs for clustering (needs the DDOT segment id)
cluster_input = remaining_for_clustering[["crash_index","ddot_idx","geometry"]].copy()
cluster_input = cluster_input.rename(columns={"ddot_idx":"cr_ddot_idx"})


Clustering using complete linkage with the 59.4 meter threshold (the median size of a block in dc)

In [None]:
# 7) CLUSTER — complete linkage per DDOT segment using THRESHOLD_M as cutoff

import numpy as np
from scipy.cluster.hierarchy import linkage, fcluster

# Use the same median-based threshold from Cell 5
CLUSTER_CUTOFF_M = THRESHOLD_M

# ✅ Reset index so positions are 0..N-1 (prevents IndexError)
cluster_input = cluster_input.reset_index(drop=True)

labels = np.full(len(cluster_input), -1, dtype=int)

for seg_id, grp in cluster_input.groupby("cr_ddot_idx"):
    idx = grp.index.to_numpy()  # now 0..N-1 safe
    coords = np.c_[grp.geometry.x.values, grp.geometry.y.values]

    n = len(coords)
    if n == 1:
        labels[idx] = 0
        continue
    if n == 2:
        d = np.linalg.norm(coords[0] - coords[1])
        if d <= CLUSTER_CUTOFF_M:
            labels[idx] = 0
        else:
            labels[idx[0]] = 0
            labels[idx[1]] = 1
        continue

    # Hierarchical (complete-linkage) clustering in Euclidean meters
    Z = linkage(coords, method="complete", metric="euclidean")
    lbl = fcluster(Z, t=CLUSTER_CUTOFF_M, criterion="distance") - 1  # zero-based
    labels[idx] = lbl

cluster_input["cluster_id_complete"] = labels

print(f"Complete-linkage clustering done. Cutoff = {CLUSTER_CUTOFF_M:.1f} m (median block size)")
print(cluster_input[["cr_ddot_idx","cluster_id_complete"]].head(10).to_string(index=False))



Table

In [None]:
# =========================
#  Cluster Severity Table (RANK, N_CRASHES, SEVERITY_SUM, AVG_LON, AVG_LAT)
# =========================

import numpy as np
import pandas as pd
import geopandas as gpd
from IPython.display import display

# 1) Ensure one row per crash in cluster_input
clust_unique = cluster_input.drop_duplicates("crash_index").copy()

# 2) Identify injury columns from same_street
injury_cols = [c for c in same_street.columns if isinstance(c, str) and (
    "FATAL" in c.upper() or "MAJOR" in c.upper() or "MINOR" in c.upper()
)]
for c in injury_cols:
    same_street[c] = pd.to_numeric(same_street[c], errors="coerce").fillna(0)

fatal_cols = [c for c in injury_cols if "FATAL" in c.upper()]
major_cols = [c for c in injury_cols if "MAJOR" in c.upper()]
minor_cols = [c for c in injury_cols if "MINOR" in c.upper()]

# 3) Collapse to one row per crash and compute PRIORITY severity
ss1 = (
    same_street[["crash_index"] + fatal_cols + major_cols + minor_cols]
    .groupby("crash_index", as_index=False)
    .max()
)

has_fatal = (ss1[fatal_cols].sum(axis=1) > 0) if fatal_cols else False
has_major = (ss1[major_cols].sum(axis=1) > 0) if major_cols else False
has_minor = (ss1[minor_cols].sum(axis=1) > 0) if minor_cols else False

sev_priority = np.select(
    [has_fatal, has_major, has_minor],
    [7, 4, 1],
    default=0
).astype(float)

sev_per_crash = ss1[["crash_index"]].copy()
sev_per_crash["SEVERITY_SCORE"] = sev_priority

# 4) Join priority severity to the unique cluster points
ci = clust_unique.merge(sev_per_crash, on="crash_index", how="left")
ci["SEVERITY_SCORE"] = pd.to_numeric(ci["SEVERITY_SCORE"], errors="coerce").fillna(0)

# 5) Aggregate by cluster and compute centroids in meters
ci["x"] = ci.geometry.x
ci["y"] = ci.geometry.y

cluster_stats = (
    ci.groupby(["cr_ddot_idx", "cluster_id_complete"], as_index=False)
      .agg(
          n_crashes=("crash_index", "size"),
          severity_sum=("SEVERITY_SCORE", "sum"),
          avg_x=("x", "mean"),
          avg_y=("y", "mean")
      )
)

# 6) Convert centroids to lon/lat for mapping
centers_g = gpd.GeoDataFrame(
    cluster_stats,
    geometry=gpd.points_from_xy(cluster_stats["avg_x"], cluster_stats["avg_y"]),
    crs=CRS_METERS
).to_crs(4326)

cluster_df = (
    centers_g
    .assign(
        avg_lon=lambda d: d.geometry.x,
        avg_lat=lambda d: d.geometry.y
    )
    .drop(columns=["geometry", "cr_ddot_idx", "cluster_id_complete", "avg_x", "avg_y"])
)

# 7) Keep only the requested columns, make ALL CAPS, add RANK
cluster_df = cluster_df[["n_crashes", "severity_sum", "avg_lon", "avg_lat"]]
cluster_df = cluster_df.rename(columns=str.upper)

# Sort by severity then crashes
cluster_df = cluster_df.sort_values(
    ["SEVERITY_SUM", "N_CRASHES"], ascending=[False, False]
).reset_index(drop=True)

# Add rank column at front
cluster_df.insert(0, "RANK", np.arange(1, len(cluster_df) + 1))

print("Cluster Table (RANK, N_CRASHES, SEVERITY_SUM, AVG_LON, AVG_LAT):")
display(cluster_df.head(10))



map

In [None]:
# === MAP: All speeding crashes (uniform style) + all devices + TOP 10 clusters (size+color by SEVERITY_SUM) ===
import numpy as np
import pandas as pd
import geopandas as gpd
import folium
from branca.colormap import linear

# 0) Ensure devices exist in WGS84
try:
    _ = gdf_cams_4326
    _ = gdf_humps_4326
except NameError:
    gdf_cams_4326  = gpd.GeoDataFrame(
        cams.copy(),
        geometry=gpd.points_from_xy(cams["LONGITUDE"], cams["LATITUDE"]),
        crs=4326
    )
    gdf_humps_4326 = gpd.GeoDataFrame(
        humps.copy(),
        geometry=gpd.points_from_xy(humps["LONGITUDE"], humps["LATITUDE"]),
        crs=4326
    )

# 1) Convert crashes to WGS84
crashes_4326 = gpd.GeoDataFrame(same_street.copy(), geometry="geometry", crs=CRS_METERS).to_crs(4326)

# 2) Top 10 clusters by SEVERITY_SUM (highest)
if "cluster_df" in globals() and isinstance(cluster_df, pd.DataFrame) and not cluster_df.empty:
    top10_clusters = cluster_df.sort_values("SEVERITY_SUM", ascending=False).head(10).copy()
else:
    top10_clusters = pd.DataFrame()

# 3) Cluster styling (variable color + radius by SEVERITY_SUM like your sample)
if not top10_clusters.empty:
    vmin = float(top10_clusters["SEVERITY_SUM"].min())
    vmax = float(top10_clusters["SEVERITY_SUM"].max())
    if vmin == vmax:
        vmin = 0.0
    cmap = linear.Reds_09.scale(vmin, vmax)

    def radius_by_sev(sev):
        sev = float(sev)
        return 6 if vmax == vmin else 6 + 20 * (sev - vmin) / (vmax - vmin)
else:
    cmap = None
    def radius_by_sev(sev):  # unused
        return 10

# 4) Build the map
m = folium.Map(location=[38.9072, -77.0369], zoom_start=12, tiles=None, max_zoom=19)
folium.TileLayer('cartodbpositron', name='OSM (Carto Positron)', control=True).add_to(m)
folium.TileLayer(
    tiles="https://services.arcgisonline.com/ArcGIS/rest/services/World_Street_Map/MapServer/tile/{z}/{y}/{x}",
    attr="Esri, Maxar, Earthstar Geographics, and the GIS User Community",
    name="ESRI World Street (z≤19)", max_zoom=19, control=True
).add_to(m)

# 5) Layer: All devices (cameras & humps)
fg_cams  = folium.FeatureGroup(name="Devices – Speed Cameras", show=False)
for _, r in gdf_cams_4326.iterrows():
    folium.CircleMarker(
        location=[float(r.geometry.y), float(r.geometry.x)],
        radius=3, color="#1976d2",
        fill=True, fill_opacity=0.9,
        popup=folium.Popup("Speed Camera", max_width=200)
    ).add_to(fg_cams)
fg_cams.add_to(m)

fg_humps = folium.FeatureGroup(name="Devices – Speed Humps", show=False)
for _, r in gdf_humps_4326.iterrows():
    folium.CircleMarker(
        location=[float(r.geometry.y), float(r.geometry.x)],
        radius=3, color="#2e7d32",
        fill=True, fill_opacity=0.9,
        popup=folium.Popup("Speed Hump", max_width=200)
    ).add_to(fg_humps)
fg_humps.add_to(m)

# 6) Layer: All speeding crashes (UNIFORM style: same size + same color)
fg_crashes = folium.FeatureGroup(name="All Speeding Crashes (uniform)", show=True)
for _, r in crashes_4326.iterrows():
    dt = r["FROMDATE"].date() if "FROMDATE" in r and pd.notna(r["FROMDATE"]) else ""
    sev = int(r.get("SEVERITY_SCORE", 0)) if "SEVERITY_SCORE" in r else ""

    pop = folium.Popup(
        f"Date: {dt}<br>Severity score: {sev}",
        max_width=280
    )

    folium.CircleMarker(
        location=[float(r.geometry.y), float(r.geometry.x)],
        radius=2.5,
        color="#0d6efd",
        fill=True,
        fill_color="",
        fill_opacity=0.65,
        weight=1,
        popup=pop
    ).add_to(fg_crashes)
fg_crashes.add_to(m)

# 7) Layer: TOP 10 cluster centroids (variable style by SEVERITY_SUM)
if not top10_clusters.empty:
    fg_clusters = folium.FeatureGroup(name="Top 10 Clusters (by SEVERITY_SUM)", show=True)

    for rank, (_, row) in enumerate(top10_clusters.iterrows(), start=1):
        lat = float(row["AVG_LAT"])
        lon = float(row["AVG_LON"])
        sev = float(row["SEVERITY_SUM"])
        n   = int(row["N_CRASHES"])

        popup = folium.Popup(
            f"<b>Rank:</b> {rank}<br>"
            f"<b>Crashes:</b> {n}<br>"
            f"<b>Severity sum:</b> {int(sev)}<br>"
            f"<b>Center:</b> {lat:.6f}, {lon:.6f}",
            max_width=360
        )

        folium.CircleMarker(
            location=[lat, lon],
            radius=radius_by_sev(sev),
            color=cmap(sev),
            fill=True,
            fill_color=cmap(sev),
            fill_opacity=0.9,
            weight=2,
            popup=popup,
            tooltip=f"Cluster Rank {rank} | Sev {int(sev)} | Cr {n}"
        ).add_to(fg_clusters)

    fg_clusters.add_to(m)

    cmap.caption = "Cluster severity (SEVERITY_SUM) — Top 10 only"
    cmap.add_to(m)
else:
    print("[Note] cluster_df not found or empty — skipping top-10 cluster layer.")

folium.LayerControl(collapsed=False).add_to(m)
display(m)

