# Q2 — Stops, Campus Anchors, Edge Spillover, Exemption Intensity
**Source:** `violations_routes_filtered.csv`
**Scope:** M101, M60+, M15+; `is_exempt == True`

In [None]:
import pandas as pd, numpy as np, re
from pathlib import Path

DATA = "/mnt/data/violations_routes_filtered.csv"
usecols = ["Bus Route ID","Datetime","is_exempt","Stop Name","Violation Status",
           "Violation Latitude","Violation Longitude","Vehicle ID"]
df = pd.read_csv(DATA, usecols=usecols, low_memory=True)
df.columns = [c.strip() for c in df.columns]

def route_tag(r):
    if pd.isna(r): return None
    r = str(r).upper().strip()
    if r.startswith("M101"): return "M101"
    if r.startswith("M60"):  return "M60+" if ("SBS" in r or "+" in r or "-SBS" in r or "-" in r) else "M60"
    if r.startswith("M15"):  return "M15+" if (("SBS" in r) or ("+" in r) or ("-SBS" in r)) else "M15"
    return r

df["route_tag"] = df["Bus Route ID"].apply(route_tag)
df = df[df["route_tag"].isin(["M101","M60+","M15+"])]
df["is_exempt"] = df["is_exempt"].astype(str).str.lower().isin(["true","1","t","yes","y"])
ex = df[df["is_exempt"]==True].copy()

def top_stops(d, topn=20):
    grp = (d.groupby("Stop Name").size().rename("exempt_count")
           .reset_index().sort_values("exempt_count", ascending=False).head(topn))
    total = d.shape[0]
    grp["share_pct"] = (grp["exempt_count"]/total*100).round(2) if total else 0.0
    return grp

for r in ["M101","M60+","M15+"]:
    print(f"\n=== Top stops — {r} ==="); display(top_stops(ex[ex["route_tag"]==r], 20))

CUNY_PATS = {
    "M101": [r"LEXINGTON AV/E 68 ST", r"\b68 ST\b", r"\bE 68\b", r"LEXINGTON AV/E 79 ST", r"3 AV/E 60 ST"],
    "M60+": [r"\b125 ST\b", r"AMSTERDAM", r"PARK AV"],
    "M15+": [r"\b23 ST\b", r"2 AV/E 23 ST", r"FULTON ST", r"CHAMBERS ST"],
}
def count_patterns(d, pats):
    s = d["Stop Name"].fillna("").str.upper()
    mask = s.str_contains = s.str.contains(pats[0])
    for p in pats[1:]:
        mask = mask | s.str.contains(p)
    return int(mask.sum()), int(d.shape[0])

rows = []
for r in ["M101","M60+","M15+"]:
    d = ex[ex["route_tag"]==r]
    cnt, tot = count_patterns(d, CUNY_PATS[r])
    rows.append([r, cnt, tot, round(100*cnt/tot if tot else 0.0, 2)])
campus = pd.DataFrame(rows, columns=["route","cuny_adj_exempt","total_exempt","share_pct"])
print("\n=== Campus-adjacent shares ==="); display(campus)

ex_counts = ex.groupby("route_tag").size().rename("exempt").reset_index()
tot_counts = df.groupby("route_tag").size().rename("total").reset_index()
intensity = tot_counts.merge(ex_counts, on="route_tag", how="left").fillna({"exempt":0})
intensity["exempt_share_pct"] = (intensity["exempt"]/intensity["total"]*100).round(2)
print("\n=== Exemption intensity (share of all violations) ==="); display(intensity)

EDGE_PATS = {
    "M60+": [r"HOYT AV", r"TRIBORO", r"RFK", r"\bBRIDGE\b", r"ASTORIA BLVD"],
    "M15+": [r"QUEENSBORO", r"\b59 ST\b", r"TUNNEL", r"\bFDR\b", r"BATTERY PARK", r"WATER ST", r"PEARL ST", r"FULTON ST"],
    "M101": [r"\b125 ST\b", r"AMSTERDAM", r"HARLEM RIVER", r"RFK"],
}
edge_rows = []
for r in ["M101","M60+","M15+"]:
    d = ex[ex["route_tag"]==r]
    s = d["Stop Name"].fillna("").str.upper()
    pats = EDGE_PATS.get(r, [])
    if pats:
        mask = s.str.contains(pats[0])
        for p in pats[1:]:
            mask = mask | s.str.contains(p)
        cnt = int(mask.sum()); tot = int(d.shape[0])
        edge_rows.append([r, cnt, tot, round(100*cnt/tot if tot else 0.0, 2)])
edges = pd.DataFrame(edge_rows, columns=["route","edge_exemptions","total_exempt","edge_share_pct"])
print("\n=== Edge/approach shares ==="); display(edges)

outdir = Path("/mnt/data/q2_notebooks/outputs"); outdir.mkdir(parents=True, exist_ok=True)
campus.to_csv(outdir/"campus_adjacent_shares.csv", index=False)
intensity.to_csv(outdir/"exemption_intensity.csv", index=False)
edges.to_csv(outdir/"edge_shares.csv", index=False)
print("Saved outputs to", outdir)