In [1]:
# Imports / display settings

from pathlib import Path
from typing import Dict, List, Optional

import numpy as np
import pandas as pd
import plotly.graph_objects as go
import openmatrix as omx

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 200)


## Reading files

In [2]:
# Config (paths, scenario namess)

ROOT_DIR = Path(r"C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\AV_intraHH_sensitivity_tests")

SCENARIO_FILES = {
    "households": "final_households.csv",
    "trips": "final_trips.csv",
    "av_trips": "final_av_vehicle_trips.csv",
}

LAND_USE_PATH = Path(
    r"C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\sensitivity_test_run_dir\final_land_use.csv"
)

SKIM_PATH = Path(
    r"C:\projects\sandag\av_tnc_routing\av_run_dir\input_data_full\skims\traffic_skims_MD.omx"
)

SKIM_DIST_CORE = "SOV_TR_H_DIST__MD"

DRIVE_MODES = ["DRIVEALONE", "SHARED2", "SHARED3"]
REPO_CORE_ORDER = ["stay_with_person", "go_to_parking", "go_home"]

DAY_START_HHMM = "03:00"

all_dirs = sorted([p.name for p in ROOT_DIR.iterdir() if p.is_dir()])

ALL_SCENARIOS = ["base"] + [s for s in all_dirs if s != "base"]
print(ALL_SCENARIOS)


['base', 'deadheading_charge', 'no_cost_remote_parking', 'no_parking_cbd']


In [3]:
#  helpers (no I/O)

def stop_period_to_clock_minutes(stop_period: pd.Series) -> pd.Series:
    sp = pd.to_numeric(stop_period, errors="coerce")
    return (sp - 1) * 30.0

def clock_minutes_to_hhmm(minutes_since_3am: float) -> str:
    if not np.isfinite(minutes_since_3am):
        return ""
    total = int(round(minutes_since_3am))
    base_h = 3
    hh = (base_h * 60 + total) % (24 * 60)
    h = hh // 60
    m = hh % 60
    return f"{h:02d}:{m:02d}"

def format_time_ticks(fig: go.Figure, minutes: List[int]) -> None:
    fig.update_xaxes(
        tickmode="array",
        tickvals=minutes,
        ticktext=[clock_minutes_to_hhmm(v) for v in minutes],
        title=f"Time of day)",
    )

# Plot helpers

def fig_grouped_choice_pct(df: pd.DataFrame, title: str) -> go.Figure:
    fig = go.Figure()
    for scen in df["scenario"].unique():
        sub = df[df["scenario"] == scen]
        fig.add_trace(go.Bar(x=sub["choice"], y=sub["pct"] * 100.0, name=scen))
    fig.update_layout(title=title, barmode="group", xaxis_title="Choice", yaxis_title="Percent")
    return fig

def fig_overlay_time_series(df: pd.DataFrame, title: str, y_col: str) -> go.Figure:
    fig = go.Figure()
    for scen in df["scenario"].unique():
        sub = df[df["scenario"] == scen].sort_values("clock_min")
        fig.add_trace(go.Scatter(x=sub["clock_min"], y=sub[y_col], mode="lines", name=scen))
    fig.update_layout(title=title, yaxis_title=y_col)
    return fig


In [4]:
# fixed inputs (land use + skim) (ONE TIME READ)

with omx.open_file(SKIM_PATH, "r") as f:
    SKIM_DIST = np.asarray(f[SKIM_DIST_CORE])

    map_name = list(f.list_mappings())[0]
    zones = list(f.mapping(map_name))
    TAZ_TO_IDX = {int(z): int(i) for i, z in enumerate(zones)}

print("Loaded land use:", LAND_USE_PATH)
print("Loaded skim:", SKIM_PATH)
print("Skim shape:", SKIM_DIST.shape)
print("TAZ mapping size:", len(TAZ_TO_IDX))


Loaded land use: C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\sensitivity_test_run_dir\final_land_use.csv
Loaded skim: C:\projects\sandag\av_tnc_routing\av_run_dir\input_data_full\skims\traffic_skims_MD.omx
Skim shape: (4947, 4947)
TAZ mapping size: 4947


In [5]:
# Build MGRA - TAZ mapping 
LAND_USE = pd.read_csv(LAND_USE_PATH, low_memory=False)

def build_mgra_to_taz(land_use: pd.DataFrame) -> Dict[int, int]:
    mgra_col = "mgra"
    taz_col  = "taz"

    mgra = land_use[mgra_col]
    taz  = land_use[taz_col]
    mapping = {}
    valid = mgra.notna() & taz.notna()
    for m, t in zip(mgra[valid].astype(int), taz[valid].astype(int)):
        mapping[int(m)] = int(t)

    return mapping

MGRA_TO_TAZ = build_mgra_to_taz(LAND_USE)
print("MGRA - TAZ mapping size:", len(MGRA_TO_TAZ))


MGRA - TAZ mapping size: 24333


In [6]:
# Scenario loader + bulk scenario read
# This cell reads ALL scenario CSVs into memory exactly once.
# Everything below will use SCENARIO_DATA_ALL and MUST NOT call read_csv again.

def load_av_scenario_csv(scenario_name: str) -> Optional[dict]:
    sdir = ROOT_DIR / scenario_name
    paths = {k: sdir / v for k, v in SCENARIO_FILES.items()}

    # CSV-only, no timers
    households = pd.read_csv(paths["households"], low_memory=False)
    trips      = pd.read_csv(paths["trips"], low_memory=False)
    av_trips   = pd.read_csv(paths["av_trips"], low_memory=False)

    return {
        "scenario": scenario_name,
        "dir": sdir,
        "households": households,
        "trips": trips,
        "av_trips": av_trips,
    }

SCENARIO_DATA_ALL: Dict[str, dict] = {}
for scen in ALL_SCENARIOS:
    SCENARIO_DATA_ALL[scen] = load_av_scenario_csv(scen)

print("Loaded scenarios:", list(SCENARIO_DATA_ALL.keys()))



Loaded scenarios: ['base', 'deadheading_charge', 'no_cost_remote_parking', 'no_parking_cbd']


In [7]:
# Update selected scenarios
# Set to [] or None if no updates are needed.

SCENARIOS_TO_UPDATE = []
# Examples:
# SCENARIOS_TO_UPDATE = ["deadheading_charge"]
# SCENARIOS_TO_UPDATE = ["base", "deadheading_charge"]

if SCENARIOS_TO_UPDATE:
    for scen in SCENARIOS_TO_UPDATE:
        sd = load_av_scenario_csv(scen)
        if sd is not None:
            SCENARIO_DATA_ALL[scen] = sd

print("Scenarios updated:", SCENARIOS_TO_UPDATE)


Scenarios updated: []


In [8]:
# Trip-dist (run once after initial load; then only for updated scenarios)
# Behavior:
# - If run with SCENARIOS_TO_UPDATE = [...] : only runs those scenarios.
# - If SCENARIOS_TO_UPDATE is empty: it runs ALL scenarios.

def add_otaz_dtaz_idx_from_mgra(df: pd.DataFrame, o_mgra_col: str, d_mgra_col: str) -> pd.DataFrame:
    out = df.copy()

    o_mgra = out[o_mgra_col]
    d_mgra = out[d_mgra_col]

    o_taz = o_mgra.map(MGRA_TO_TAZ)
    d_taz = d_mgra.map(MGRA_TO_TAZ)

    out["o_taz"] = pd.to_numeric(o_taz, errors="coerce")
    out["d_taz"] = pd.to_numeric(d_taz, errors="coerce")

    otaz_idx = out["o_taz"].map(TAZ_TO_IDX)
    dtaz_idx = out["d_taz"].map(TAZ_TO_IDX)

    out["otaz_idx"] = pd.to_numeric(otaz_idx, errors="coerce")
    out["dtaz_idx"] = pd.to_numeric(dtaz_idx, errors="coerce")

    return out


def get_trip_dist(df: pd.DataFrame, skim: np.ndarray) -> pd.DataFrame:
    m = df["otaz_idx"].notna() & df["dtaz_idx"].notna()
    i = df.loc[m, "otaz_idx"].astype(int).to_numpy()
    j = df.loc[m, "dtaz_idx"].astype(int).to_numpy()
    ok = (i >= 0) & (i < skim.shape[0]) & (j >= 0) & (j < skim.shape[1])
    vals = np.full(m.sum(), np.nan, dtype=float)
    vals[ok] = skim[i[ok], j[ok]]
    df["trip_dist"] = np.nan
    df.loc[m, "trip_dist"] = vals
    return df


def ensure_trip_dist_for_scenario(sd: dict) -> dict:
    trips = sd["trips"]
    av = sd["av_trips"]

    trips_o = "origin"
    trips_d = "destination"

    trips2 = add_otaz_dtaz_idx_from_mgra(trips, trips_o, trips_d)
    trips2 = get_trip_dist(trips2, SKIM_DIST)

    av_o = "origin"
    av_d = "destination"

    av2 = add_otaz_dtaz_idx_from_mgra(av, av_o, av_d)
    av2 = get_trip_dist(av2, SKIM_DIST)

    sd["trips"] = trips2
    sd["av_trips"] = av2
    sd["_prepped_trip_dist"] = True
    return sd


# Decide which scenarios to prep:
# - If SCENARIOS_TO_UPDATE exists and is non-empty: prep only those.
# - Otherwise: prep ALL scenarios.

if "SCENARIOS_TO_UPDATE" in globals() and SCENARIOS_TO_UPDATE:
    _targets = [s for s in SCENARIOS_TO_UPDATE if s in SCENARIO_DATA_ALL]
else:
    _targets = list(SCENARIO_DATA_ALL.keys())

for s in _targets:
    SCENARIO_DATA_ALL[s] = ensure_trip_dist_for_scenario(SCENARIO_DATA_ALL[s])

print("Trip-dist prep done for:", _targets)


Trip-dist prep done for: ['base', 'deadheading_charge', 'no_cost_remote_parking', 'no_parking_cbd']


## Analysis

In [9]:

def metric1_share_driving_served(scenario_data: Dict[str, dict]) -> pd.DataFrame:
    rows = []
    for scen, sd in scenario_data.items():
        trips = sd["trips"]
        av = sd["av_trips"]

        total_drive = int(trips[trips["trip_mode"].isin(DRIVE_MODES)].shape[0])

        served = av["trip_id"].dropna().nunique()

        share = (served / total_drive) if total_drive else np.nan

        rows.append({
            "scenario": scen,
            "total_driving_trips": total_drive,
            "av_served_driving_trips_unique": int(served),
            "share_served": float(share),
        })
    return pd.DataFrame(rows)

def metric1_share_driving_served_with_why(scenario_data: Dict[str, dict]) -> pd.DataFrame:
    rows = []
    for scen, sd in scenario_data.items():
        trips = sd["trips"]
        av = sd["av_trips"]

        drive_trips = trips[trips["trip_mode"].isin(DRIVE_MODES)].copy()
        total_drive = int(len(drive_trips))

        av_trip_ids = set(av["trip_id"].dropna().unique())
        served_ids = set(drive_trips.loc[drive_trips["trip_id"].isin(av_trip_ids), "trip_id"].dropna().unique())
        served = int(len(served_ids))

        non_served = drive_trips.loc[~drive_trips["trip_id"].isin(av_trip_ids)].copy()

        veh_hh_links = (
            av.loc[av["trip_id"].notna(), ["vehicle_id", "trip_id"]]
              .merge(trips[["trip_id", "household_id"]], on="trip_id", how="left")
              .dropna(subset=["vehicle_id", "household_id"])
        )
        veh_to_hh = (
            veh_hh_links.drop_duplicates(subset=["vehicle_id", "household_id"])
                        .set_index("vehicle_id")["household_id"]
        )

        veh_timepool = (
            av.dropna(subset=["vehicle_id", "depart"])
              .groupby("vehicle_id")["depart"]
              .apply(lambda s: set(s.unique()))
        )

        hh_to_vehicles = (
            veh_to_hh.reset_index()
                     .groupby("household_id")["vehicle_id"]
                     .apply(list)
                     .to_dict()
        )

        if len(non_served):
            vids_series = non_served["household_id"].map(hh_to_vehicles)
            depart_series = non_served["depart"]

            unavailable = []
            for vids, depart_bin in zip(vids_series, depart_series):
                if not isinstance(vids, list) or len(vids) == 0:
                    unavailable.append(False)
                else:
                    unavailable.append(all(depart_bin in veh_timepool.get(v, set()) for v in vids))

            non_served["unavailable"] = unavailable
            unavail_ct = int(non_served["unavailable"].sum())
        else:
            unavail_ct = 0

        avail_not_used_ct = int(len(non_served) - unavail_ct)

        served_pct = (served / total_drive) if total_drive else float("nan")
        unavail_pct = (unavail_ct / total_drive) if total_drive else float("nan")
        avail_not_used_pct = (avail_not_used_ct / total_drive) if total_drive else float("nan")

        rows.append({
            "scenario": scen,
            "total_driving_trips": total_drive,
            "served_ct": served,
            "unavailable_ct": unavail_ct,
            "available_not_used_ct": avail_not_used_ct,
            "served_pct": served_pct,
            "unavailable_pct": unavail_pct,
            "available_not_used_pct": avail_not_used_pct,
        })

    return pd.DataFrame(rows)

def metric2_reposition_choice(scenario_data: Dict[str, dict]) -> pd.DataFrame:
    all_rows = []
    for scen, sd in scenario_data.items():
        av = sd["av_trips"]

        vc = av["av_repositioning_choice"].dropna().value_counts()
        total = float(vc.sum()) if len(vc) else 0.0

        present = list(vc.index)
        ordered = [c for c in REPO_CORE_ORDER if c in present] + [c for c in present if c not in REPO_CORE_ORDER]

        for c in ordered:
            cnt = int(vc.get(c, 0))
            pct = (cnt / total) if total else np.nan
            all_rows.append({"scenario": scen, "choice": c, "count": cnt, "pct": pct})

    return pd.DataFrame(all_rows)

def metric3_deadhead_shares(scenario_data: Dict[str, dict]) -> pd.DataFrame:
    rows = []
    for scen, sd in scenario_data.items():
        trips = sd["trips"]
        av = sd["av_trips"]

        drive_trips = trips[trips["trip_mode"].isin(DRIVE_MODES)]
        total_av = int(av.shape[0])
        dead_ct = int(av.loc[av["is_deadhead"] == True].shape[0])


        av_trip_ids = set(av["trip_id"].dropna().unique())
        non_av_drive = drive_trips.loc[~drive_trips["trip_id"].isin(av_trip_ids)]
        non_av_ct = int(non_av_drive.shape[0])

        dead_pct_of_av = (dead_ct / total_av) if total_av else np.nan
        dead_pct_of_all_driving = (dead_ct / (total_av + non_av_ct)) if (total_av + non_av_ct) else np.nan

        rows.append({
            "scenario": scen,
            "deadhead_av_trips": dead_ct,
            "all_av_trips": total_av,
            "all_non_av_driving_trips": non_av_ct,
            "deadhead_pct_of_av_trips": float(dead_pct_of_av),
            "deadhead_pct_of_all_driving": float(dead_pct_of_all_driving),
        })

    return pd.DataFrame(rows)

def add_prev_dest_to_cur_orig_dist(
    df: pd.DataFrame,
    prev_dest_mgra_col: str,
    cur_orig_mgra_col: str,
    out_col: str,
) -> pd.DataFrame:
    out = df.copy()

    prev_mgra = out[prev_dest_mgra_col]
    cur_mgra  = out[cur_orig_mgra_col]

    prev_taz = prev_mgra.map(MGRA_TO_TAZ)
    cur_taz  = cur_mgra.map(MGRA_TO_TAZ)

    i = prev_taz.map(TAZ_TO_IDX)
    j = cur_taz.map(TAZ_TO_IDX)

    i = pd.to_numeric(i, errors="coerce").fillna(-1).astype(np.int64).to_numpy()
    j = pd.to_numeric(j, errors="coerce").fillna(-1).astype(np.int64).to_numpy()

    valid = (i >= 0) & (j >= 0) & (i < SKIM_DIST.shape[0]) & (j < SKIM_DIST.shape[1])

    dist = np.full(len(out), np.nan, dtype=float)
    dist[valid] = SKIM_DIST[i[valid], j[valid]]

    out[out_col] = dist
    return out

def metric4_prev_dest_to_next_origin(scenario_data: Dict[str, dict]):
    overall_rows = []
    profile_rows = []

    for scen, sd in scenario_data.items():
        av = sd["av_trips"].copy()

        depart_col = "depart"
        if depart_col is None:
            overall_rows.append({"scenario": scen, "avg_prev_dest_to_next_origin_dist": np.nan})
            continue

        needed = {"vehicle_id", "origin", "destination", depart_col}
        if any(c not in av.columns for c in needed):
            overall_rows.append({"scenario": scen, "avg_prev_dest_to_next_origin_dist": np.nan})
            continue

        base = av.dropna(subset=["vehicle_id", "origin", "destination", depart_col]).copy()

        if "is_deadhead" in base.columns:
            base = base.loc[base["is_deadhead"] == False].copy()

        base["veh"] = pd.to_numeric(base["vehicle_id"], errors="coerce").astype("Int64")
        base["depart_bin"] = pd.to_numeric(base[depart_col], errors="coerce")

        base = base.sort_values(["veh", "depart_bin"])
        base["prev_dest_mgra"] = base.groupby("veh")["destination"].shift(1)

        base = add_prev_dest_to_cur_orig_dist(
            base,
            prev_dest_mgra_col="prev_dest_mgra",
            cur_orig_mgra_col="origin",
            out_col="prevdest_to_curorig_dist",
        )

        arr = pd.to_numeric(base["prevdest_to_curorig_dist"], errors="coerce").to_numpy()
        overall_mean = float(np.nanmean(arr)) if np.isfinite(arr).any() else np.nan
        overall_rows.append({"scenario": scen, "avg_prev_dest_to_next_origin_dist": overall_mean})

        tmp = base.copy()
        tmp["clock_min"] = stop_period_to_clock_minutes(tmp["depart_bin"])
        prof = (
            tmp.groupby("clock_min", dropna=True)["prevdest_to_curorig_dist"]
               .mean()
               .reset_index()
               .rename(columns={"prevdest_to_curorig_dist": "mean_dist"})
        )

        for _, r in prof.iterrows():
            profile_rows.append({
                "scenario": scen,
                "clock_min": float(r["clock_min"]),
                "mean_dist": float(r["mean_dist"]),
            })

    return pd.DataFrame(overall_rows), pd.DataFrame(profile_rows)

def metric5_not_served_within_av_households(scenario_data: Dict[str, dict]) -> pd.DataFrame:
    rows = []
    for scen, sd in scenario_data.items():
        hh = sd["households"]
        trips = sd["trips"]
        av = sd["av_trips"]

        hh_id_col = "household_id"
        trips_hh_col = "household_id"

        if hh_id_col is None or trips_hh_col is None:
            rows.append({
                "scenario": scen,
                "hh_with_av_count": 0,
                "driving_trips_from_av_households": 0,
                "driving_trips_not_served_by_av": 0,
                "share_not_served_within_av_households": np.nan,
            })
            continue

        av_own_col = "av_ownership"
        auto_own_col ="auto_ownership"

        if av_own_col is None:
            hh_with_av = set()
        else:
            av_own = pd.to_numeric(hh[av_own_col], errors="coerce").fillna(0).ne(0)
            if auto_own_col is not None:
                auto_own = pd.to_numeric(hh[auto_own_col], errors="coerce").fillna(0).ne(0)
                mask = av_own & auto_own
            else:
                mask = av_own

            hh_with_av = set(
                pd.to_numeric(hh.loc[mask, hh_id_col], errors="coerce").dropna().astype(int).unique()
            )

        hh_drive = trips[
            (trips[trips_hh_col].isin(hh_with_av)) &
            (trips["trip_mode"].isin(DRIVE_MODES))
        ].copy()

        av_trip_ids = set(av["trip_id"].dropna().unique())
        non_served = hh_drive.loc[~hh_drive["trip_id"].isin(av_trip_ids)].copy()

        total = int(hh_drive.shape[0])
        not_served = int(non_served.shape[0])
        share = (not_served / total) if total else np.nan

        rows.append({
            "scenario": scen,
            "hh_with_av_count": int(len(hh_with_av)),
            "driving_trips_from_av_households": total,
            "driving_trips_not_served_by_av": not_served,
            "share_not_served_within_av_households": float(share),
        })

    return pd.DataFrame(rows)


In [21]:
# def run_all_cases(all_data: Dict[str, dict], scenario_order: List[str]) -> None:
#     scenario_data = {s: all_data[s] for s in scenario_order if s in all_data}
#     if not scenario_data:
#         print("No scenarios loaded.")
#         return

#     print("Using loaded scenarios:", list(scenario_data.keys()))

#     # ---------------- Metric 1 ----------------
#     m1 = metric1_share_driving_served_with_why(scenario_data)

#     m1_print = m1.copy()
#     m1_print["served_pct"] = (m1_print["served_pct"] * 100.0).round(2)
#     m1_print["unavailable_pct"] = (m1_print["unavailable_pct"] * 100.0).round(2)
#     m1_print["available_not_used_pct"] = (m1_print["available_not_used_pct"] * 100.0).round(2)

#     print("\n[Metric 1] Share of driving trips serviced by AV")
#     print(m1_print[[
#         "scenario",
#         "total_driving_trips",
#         "served_ct",
#         "unavailable_ct",
#         "available_not_used_ct",
#         "served_pct",
#         "unavailable_pct",
#         "available_not_used_pct",
#     ]].to_string(index=False))

#     m1_plot = m1.set_index("scenario").loc[scenario_order].reset_index()

#     y_served = m1_plot["served_pct"] * 100.0
#     y_unav   = m1_plot["unavailable_pct"] * 100.0
#     y_avail  = m1_plot["available_not_used_pct"] * 100.0

#     fig1 = go.Figure()
#     fig1.add_trace(go.Bar(
#         x=m1_plot["scenario"],
#         y=y_served,
#         name="Served by AV",
#         text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_served],
#         texttemplate="%{text}",
#         textposition="inside",
#         insidetextanchor="middle",
#     ))
#     fig1.add_trace(go.Bar(
#         x=m1_plot["scenario"],
#         y=y_unav,
#         name="Not served: AV unavailable",
#         text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_unav],
#         texttemplate="%{text}",
#         textposition="inside",
#         insidetextanchor="middle",
#     ))
#     fig1.add_trace(go.Bar(
#         x=m1_plot["scenario"],
#         y=y_avail,
#         name="Not served: AV available, not used",
#         text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_avail],
#         texttemplate="%{text}",
#         textposition="inside",
#         insidetextanchor="middle",
#     ))
#     fig1.update_layout(
#         barmode="stack",
#         title="driving trips breakdown",
#         xaxis_title="Scenario",
#         yaxis_title="Percent",
#         uniformtext_minsize=9,
#         uniformtext_mode="hide",
#     )
#     fig1.update_traces(
#         texttemplate="%{y:.1f}",
#         textposition="inside",
#         insidetextanchor="middle",
#         textfont=dict(size=14),
#         cliponaxis=False,   # prevent text from being clipped by the plot area
#     )

#     fig1.update_layout(
#         uniformtext_minsize=14,
#         uniformtext_mode="show",  # never hide labels
#         yaxis=dict(range=[0, 105]),  # a little headroom; keeps everything comfortably visible
#         margin=dict(t=80),
#     )
#     fig1.show()

#     # ---------------- Metric 2 ----------------
#     m2 = metric2_reposition_choice(scenario_data)
#     m2_print = m2.copy()
#     m2_print["pct"] = (m2_print["pct"] * 100.0).round(2)
#     print("\n[Metric 2] AV repositioning choices (percent)")
#     print(m2_print[["scenario", "choice", "count", "pct"]].to_string(index=False))

#     # grouped bars: x=choice, one trace per scenario (with labels)
#     m2_plot = m2.copy()
#     m2_plot["pct100"] = m2_plot["pct"] * 100.0

#     # stable ordering (your existing REPO_CORE_ORDER first, then any extras)
#     present = list(m2_plot["choice"].dropna().unique())
#     choice_order = [c for c in REPO_CORE_ORDER if c in present] + [c for c in present if c not in REPO_CORE_ORDER]

#     fig2 = go.Figure()
#     for scen in scenario_order:
#         if scen not in scenario_data:
#             continue
#         sub = m2_plot.loc[m2_plot["scenario"] == scen].copy()
#         sub = sub.set_index("choice").reindex(choice_order).reset_index()

#         y = sub["pct100"]
#         fig2.add_trace(go.Bar(
#             x=sub["choice"],
#             y=y,
#             name=scen,
#             text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
#             texttemplate="%{text}",
#             textposition="outside",
#         ))

#     fig2.update_layout(
#         barmode="group",
#         title="AV repositioning choice breakdown",
#         xaxis_title="Choice",
#         yaxis_title="Percent",
#         uniformtext_minsize=9,
#         uniformtext_mode="hide",
#     )
#     fig2.show()

#     # ---------------- Metric 3 ----------------
#     m3 = metric3_deadhead_shares(scenario_data)
#     m3_print = m3.copy()
#     m3_print["deadhead_pct_of_av_trips"] = (m3_print["deadhead_pct_of_av_trips"] * 100.0).round(2)
#     m3_print["deadhead_pct_of_all_driving"] = (m3_print["deadhead_pct_of_all_driving"] * 100.0).round(2)
#     print("\n[Metric 3] Deadheading shares (counts-based)")
#     print(m3_print[[
#         "scenario",
#         "deadhead_av_trips",
#         "all_av_trips",
#         "all_non_av_driving_trips",
#         "deadhead_pct_of_av_trips",
#         "deadhead_pct_of_all_driving",
#     ]].to_string(index=False))

#     y = m3["deadhead_pct_of_av_trips"] * 100.0
#     fig3a = go.Figure()
#     fig3a.add_trace(go.Bar(
#         x=m3["scenario"],
#         y=y,
#         name="% of trips",
#         text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
#         texttemplate="%{text}",
#         textposition="outside",
#     ))
#     fig3a.update_layout(title="Deadheading (% of AV trips)",
#                         xaxis_title="Scenario", yaxis_title="Percent")
#     fig3a.show()

#     y = m3["deadhead_pct_of_all_driving"] * 100.0
#     fig3b = go.Figure()
#     fig3b.add_trace(go.Bar(
#         x=m3["scenario"],
#         y=y,
#         name="% of VMT",
#         text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
#         texttemplate="%{text}",
#         textposition="outside",
#     ))
#     fig3b.update_layout(title="Deadheading (% of VMT)",
#                         xaxis_title="Scenario", yaxis_title="Percent")
#     fig3b.show()

#     # ---------------- Metric 4 ----------------
#     m4_overall, m4_profile = metric4_prev_dest_to_next_origin(scenario_data)
#     print("\n[Metric 4] Avg prev-dest → next-origin distance (overall, served trips)")
#     print(m4_overall.to_string(index=False))

#     fig4 = fig_overlay_time_series(
#         m4_profile,
#         "Mean prev-dest → next-origin distance by time (served trips)",
#         y_col="mean_dist",
#     )
#     ticks = list(range(0, 24 * 60, 120))
#     format_time_ticks(fig4, ticks)
#     fig4.update_yaxes(title="Miles")
#     fig4.show()
def run_all_cases(all_data: Dict[str, dict], scenario_order: List[str]) -> None:
    scenario_data = {s: all_data[s] for s in scenario_order if s in all_data}
    if not scenario_data:
        print("No scenarios loaded.")
        return

    print("Using loaded scenarios:", list(scenario_data.keys()))

    SCALE = 1.30  # +30% text sizing

    # ---------------- Metric 1 ----------------
    m1 = metric1_share_driving_served_with_why(scenario_data)

    m1_print = m1.copy()
    m1_print["served_pct"] = (m1_print["served_pct"] * 100.0).round(2)
    m1_print["unavailable_pct"] = (m1_print["unavailable_pct"] * 100.0).round(2)
    m1_print["available_not_used_pct"] = (m1_print["available_not_used_pct"] * 100.0).round(2)

    print("\n[Metric 1] Share of driving trips serviced by AV")
    print(m1_print[[
        "scenario",
        "total_driving_trips",
        "served_ct",
        "unavailable_ct",
        "available_not_used_ct",
        "served_pct",
        "unavailable_pct",
        "available_not_used_pct",
    ]].to_string(index=False))

    m1_plot = m1.set_index("scenario").loc[scenario_order].reset_index()

    y_served = m1_plot["served_pct"] * 100.0
    y_unav   = m1_plot["unavailable_pct"] * 100.0
    y_avail  = m1_plot["available_not_used_pct"] * 100.0

    fig1 = go.Figure()
    fig1.add_trace(go.Bar(
        x=m1_plot["scenario"],
        y=y_served,
        name="Served",
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_served],
        texttemplate="%{text}",
        textposition="inside",
        insidetextanchor="middle",
    ))
    fig1.add_trace(go.Bar(
        x=m1_plot["scenario"],
        y=y_unav,
        name="Not served: unavailable",
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_unav],
        texttemplate="%{text}",
        textposition="inside",
        insidetextanchor="middle",
    ))
    fig1.add_trace(go.Bar(
        x=m1_plot["scenario"],
        y=y_avail,
        name="Not served: avail/not used",
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y_avail],
        texttemplate="%{text}",
        textposition="inside",
        insidetextanchor="middle",
    ))

    fig1.update_layout(
        barmode="stack",
        width=1600,
        height=780,
        title=None,
        xaxis=dict(title="Scenario", titlefont=dict(size=int(22*SCALE)), tickfont=dict(size=int(18*SCALE))),
        yaxis=dict(title="Percent", titlefont=dict(size=int(22*SCALE)), tickfont=dict(size=int(18*SCALE)), range=[0, 112]),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="left",
            x=0.0,
            font=dict(size=int(18*SCALE)),
            title_text="",
        ),
        margin=dict(l=95, r=50, t=40, b=95),
        uniformtext_minsize=int(18*SCALE),
        uniformtext_mode="show",
        font=dict(size=int(16*SCALE)),
    )
    fig1.update_traces(textfont=dict(size=int(18*SCALE)), cliponaxis=False)
    fig1.show()

    # ---------------- Metric 2 ----------------
    m2 = metric2_reposition_choice(scenario_data)
    m2_print = m2.copy()
    m2_print["pct"] = (m2_print["pct"] * 100.0).round(2)
    print("\n[Metric 2] AV repositioning choices (percent)")
    print(m2_print[["scenario", "choice", "count", "pct"]].to_string(index=False))

    m2_plot = m2.copy()
    m2_plot["pct100"] = m2_plot["pct"] * 100.0

    present = list(m2_plot["choice"].dropna().unique())
    choice_order = [c for c in REPO_CORE_ORDER if c in present] + [c for c in present if c not in REPO_CORE_ORDER]

    fig2 = go.Figure()
    for scen in scenario_order:
        if scen not in scenario_data:
            continue
        sub = m2_plot.loc[m2_plot["scenario"] == scen].copy()
        sub = sub.set_index("choice").reindex(choice_order).reset_index()

        y = sub["pct100"]
        fig2.add_trace(go.Bar(
            x=sub["choice"],
            y=y,
            name=scen,
            text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
            texttemplate="%{text}",
            textposition="outside",
        ))

    fig2.update_layout(
        barmode="group",
        width=1900,
        height=800,
        title=None,
        xaxis=dict(title="Choice", titlefont=dict(size=int(22*SCALE)), tickfont=dict(size=int(16*SCALE))),
        yaxis=dict(title="Percent", titlefont=dict(size=int(22*SCALE)), tickfont=dict(size=int(18*SCALE)), range=[0, 110]),
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.02,
            xanchor="left",
            x=0.0,
            font=dict(size=int(18*SCALE)),
            title_text="",
        ),
        margin=dict(l=95, r=50, t=40, b=130),
        uniformtext_minsize=int(14*SCALE),
        uniformtext_mode="show",
        font=dict(size=int(16*SCALE)),
    )
    fig2.update_traces(textfont=dict(size=int(16*SCALE)), cliponaxis=False)
    fig2.show()

    # ---------------- Metric 3 ----------------
    m3 = metric3_deadhead_shares(scenario_data)
    m3_print = m3.copy()
    m3_print["deadhead_pct_of_av_trips"] = (m3_print["deadhead_pct_of_av_trips"] * 100.0).round(2)
    m3_print["deadhead_pct_of_all_driving"] = (m3_print["deadhead_pct_of_all_driving"] * 100.0).round(2)
    print("\n[Metric 3] Deadheading shares (counts-based)")
    print(m3_print[[
        "scenario",
        "deadhead_av_trips",
        "all_av_trips",
        "all_non_av_driving_trips",
        "deadhead_pct_of_av_trips",
        "deadhead_pct_of_all_driving",
    ]].to_string(index=False))

    y = m3["deadhead_pct_of_av_trips"] * 100.0
    fig3a = go.Figure()
    fig3a.add_trace(go.Bar(
        x=m3["scenario"],
        y=y,
        name="Deadhead (% AV trips)",
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
        texttemplate="%{text}",
        textposition="outside",
    ))
    fig3a.update_layout(
        width=1400,
        height=720,
        title=None,
        xaxis=dict(title="Scenario", titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        yaxis=dict(title="Percent", titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        margin=dict(l=90, r=50, t=40, b=95),
        uniformtext_minsize=int(14*SCALE),
        uniformtext_mode="show",
        font=dict(size=int(16*SCALE)),
    )
    fig3a.update_traces(textfont=dict(size=int(16*SCALE)), cliponaxis=False)
    fig3a.show()

    y = m3["deadhead_pct_of_all_driving"] * 100.0
    fig3b = go.Figure()
    fig3b.add_trace(go.Bar(
        x=m3["scenario"],
        y=y,
        name="Deadhead (% all driving)",
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
        texttemplate="%{text}",
        textposition="outside",
    ))
    fig3b.update_layout(
        width=1400,
        height=720,
        title=None,
        xaxis=dict(title="Scenario", titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        yaxis=dict(title="Percent", titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        margin=dict(l=90, r=50, t=40, b=95),
        uniformtext_minsize=int(14*SCALE),
        uniformtext_mode="show",
        font=dict(size=int(16*SCALE)),
    )
    fig3b.update_traces(textfont=dict(size=int(16*SCALE)), cliponaxis=False)
    fig3b.show()

    # ---------------- Metric 4 ----------------
    m4_overall, m4_profile = metric4_prev_dest_to_next_origin(scenario_data)
    print("\n[Metric 4] Avg prev-dest → next-origin distance (overall, served trips)")
    print(m4_overall.to_string(index=False))

    fig4 = fig_overlay_time_series(
        m4_profile,
        "",  # no title
        y_col="mean_dist",
    )
    ticks = list(range(0, 24 * 60, 120))
    format_time_ticks(fig4, ticks)
    fig4.update_yaxes(title="Miles")

    fig4.update_layout(
        width=1600,
        height=780,
        title=None,
        legend=dict(font=dict(size=int(16*SCALE)), title_text=""),
        xaxis=dict(titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        yaxis=dict(titlefont=dict(size=int(20*SCALE)), tickfont=dict(size=int(16*SCALE))),
        margin=dict(l=95, r=50, t=40, b=95),
        font=dict(size=int(16*SCALE)),
    )
    fig4.show()


## Stay with person in CBD when parking prohibited 

### Repositioning choice breakdown for AV trips with CBD destinations

In [11]:
# Going to CBD: AV repositioning choice breakdown
# (apply the same exclusion ONLY for no_parking_cbd: drop CBD-dest + stay_with_person trip_ids)

SCEN_EXCL = "no_parking_cbd"
CHOICE_EXCL = "stay_with_person"

mgra_to_pseudo = dict(zip(
    LAND_USE["mgra"].astype(int),
    pd.to_numeric(LAND_USE["pseudomsa"], errors="coerce").fillna(-1).astype(int),
))

# trip_ids to exclude (ONLY for no_parking_cbd)
av_excl = SCENARIO_DATA_ALL[SCEN_EXCL]["av_trips"]
dest_pseudo_excl = pd.to_numeric(av_excl["destination"], errors="coerce").fillna(-1).astype(int).map(mgra_to_pseudo)
drop_trip_ids = set(
    av_excl.loc[(dest_pseudo_excl == 1) & (av_excl["av_repositioning_choice"] == CHOICE_EXCL), "trip_id"]
        .dropna()
        .unique()
)

rows = []
for scen, sd in SCENARIO_DATA_ALL.items():
    av = sd["av_trips"]

    if scen == SCEN_EXCL and drop_trip_ids:
        av = av.loc[~av["trip_id"].isin(drop_trip_ids)].copy()

    dest_pseudo = pd.to_numeric(av["destination"], errors="coerce").fillna(-1).astype(int).map(mgra_to_pseudo)
    cbd = av.loc[dest_pseudo == 1]

    vc = cbd["av_repositioning_choice"].dropna().value_counts()
    total = float(vc.sum()) if len(vc) else 0.0

    present = list(vc.index)
    ordered = [c for c in REPO_CORE_ORDER if c in present] + [c for c in present if c not in REPO_CORE_ORDER]

    for c in ordered:
        cnt = int(vc.get(c, 0))
        pct = (cnt / total) if total else float("nan")
        rows.append({"scenario": scen, "choice": c, "count": cnt, "pct": pct})

df_cbd_choice = pd.DataFrame(rows)
df_cbd_choice["pct100"] = df_cbd_choice["pct"] * 100.0

present = list(df_cbd_choice["choice"].dropna().unique())
choice_order = [c for c in REPO_CORE_ORDER if c in present] + [c for c in present if c not in REPO_CORE_ORDER]

fig = go.Figure()
for scen in list(SCENARIO_DATA_ALL.keys()):
    sub = df_cbd_choice.loc[df_cbd_choice["scenario"] == scen].copy()
    sub = sub.set_index("choice").reindex(choice_order).reset_index()

    y = sub["pct100"]
    fig.add_trace(go.Bar(
        x=sub["choice"],
        y=y,
        name=scen,
        text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
        texttemplate="%{text}",
        textposition="outside",
    ))

fig.update_layout(
    barmode="group",
    title="Going to CBD: AV repositioning choice breakdown",
    xaxis_title="Choice",
    yaxis_title="Percent",
    uniformtext_minsize=9,
    uniformtext_mode="hide",
)
fig.show()


### Deadheading percentage of trips heading to CBD

In [12]:
mgra_to_pseudo = dict(zip(
    LAND_USE["mgra"].astype(int),
    pd.to_numeric(LAND_USE["pseudomsa"], errors="coerce").fillna(-1).astype(int),
))

rows = []
for scen, sd in SCENARIO_DATA_ALL.items():
    av = sd["av_trips"]

    dest_pseudo = pd.to_numeric(av["destination"], errors="coerce").fillna(-1).astype(int).map(mgra_to_pseudo)
    sub = av.loc[dest_pseudo == 1]

    A = pd.to_numeric(sub.loc[sub["is_deadhead"] == True, "trip_dist"], errors="coerce").sum()
    B = pd.to_numeric(sub["trip_dist"], errors="coerce").sum()

    rows.append({"scenario": scen, "pct": (A / B * 100.0) if B > 0 else float("nan")})

df = pd.DataFrame(rows).set_index("scenario").loc[list(SCENARIO_DATA_ALL.keys())].reset_index()

y = df["pct"]
fig = go.Figure()
fig.add_trace(go.Bar(
    x=df["scenario"],
    y=y,
    text=[f"{v:.1f}" if pd.notna(v) else "" for v in y],
    texttemplate="%{text}",
    textposition="outside",
))
fig.update_layout(
    title="Going to CBD: Deadheading share of VMT",
    xaxis_title="Scenario",
    yaxis_title="Percent",
    uniformtext_minsize=9,
    uniformtext_mode="hide",
)
fig.show()


## Final Summaries

In [22]:
# ignore CBD-destination + stay_with_person rows in no_parking_cbd, then rerun metrics 

SCEN = "no_parking_cbd"
CHOICE = "stay_with_person"

mgra_to_pseudo = dict(zip(
    LAND_USE["mgra"].astype(int),
    pd.to_numeric(LAND_USE["pseudomsa"], errors="coerce").fillna(-1).astype(int),
))

av0 = SCENARIO_DATA_ALL[SCEN]["av_trips"]
dest_pseudo = pd.to_numeric(av0["destination"], errors="coerce").fillna(-1).astype(int).map(mgra_to_pseudo)

drop_mask = (dest_pseudo == 1) & (av0["av_repositioning_choice"] == CHOICE)
drop_trip_ids = set(av0.loc[drop_mask, "trip_id"].dropna().unique())

scenario_data_mod = dict(SCENARIO_DATA_ALL)
scenario_data_mod[SCEN] = dict(SCENARIO_DATA_ALL[SCEN])
scenario_data_mod[SCEN]["av_trips"] = av0.loc[~av0["trip_id"].isin(drop_trip_ids)].copy()

print(f"Filtered out {int(drop_mask.sum()):,} AV rows in {SCEN} (CBD dest + {CHOICE}).")

scenario_order = list(scenario_data_mod.keys())
run_all_cases(scenario_data_mod, scenario_order)


Filtered out 235 AV rows in no_parking_cbd (CBD dest + stay_with_person).
Using loaded scenarios: ['base', 'deadheading_charge', 'no_cost_remote_parking', 'no_parking_cbd']

[Metric 1] Share of driving trips serviced by AV
              scenario  total_driving_trips  served_ct  unavailable_ct  available_not_used_ct  served_pct  unavailable_pct  available_not_used_pct
                  base             10582539    8505884         1906616                 170039       80.38            18.02                    1.61
    deadheading_charge             10582539    8045232         1821706                 715601       76.02            17.21                    6.76
no_cost_remote_parking             10582539    8505841         1906663                 170035       80.38            18.02                    1.61
        no_parking_cbd             10582539    8505642         1906722                 170175       80.37            18.02                    1.61



[Metric 2] AV repositioning choices (percent)
              scenario              choice   count   pct
                  base    stay_with_person 5293608 75.28
                  base       go_to_parking   94645  1.35
                  base             go_home  783028 11.14
                  base service_next_trip_1  488805  6.95
                  base service_next_trip_2  245158  3.49
                  base service_next_trip_3  126788  1.80
    deadheading_charge    stay_with_person 6040785 90.92
    deadheading_charge       go_to_parking   55162  0.83
    deadheading_charge             go_home  290038  4.37
    deadheading_charge service_next_trip_1  140167  2.11
    deadheading_charge service_next_trip_2   75763  1.14
    deadheading_charge service_next_trip_3   41820  0.63
no_cost_remote_parking    stay_with_person 5282844 75.13
no_cost_remote_parking       go_to_parking  116803  1.66
no_cost_remote_parking             go_home  775615 11.03
no_cost_remote_parking service_next_trip_


[Metric 3] Deadheading shares (counts-based)
              scenario  deadhead_av_trips  all_av_trips  all_non_av_driving_trips  deadhead_pct_of_av_trips  deadhead_pct_of_all_driving
                  base            4567115      13072999                   2076655                     34.94                        30.15
    deadheading_charge            2532260      10577492                   2537307                     23.94                        19.31
no_cost_remote_parking            4584904      13090745                   2076698                     35.02                        30.23
        no_parking_cbd            4644831      13150473                   2076897                     35.32                        30.50



[Metric 4] Avg prev-dest → next-origin distance (overall, served trips)
              scenario  avg_prev_dest_to_next_origin_dist
                  base                           3.206320
    deadheading_charge                           2.024152
no_cost_remote_parking                           3.205988
        no_parking_cbd                           3.207656
