In [1]:

import os
from pathlib import Path
import warnings
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime, timedelta
import yaml
from typing import Callable, Dict, Any


In [2]:
# Root 
BASE_OUTPUT_DIR = Path(r"C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\tnc_routing_test")

# Scenario groups
TEST_GROUPS = {
    "time_bin_size": ["base", "timebin_5", "timebin_15"],
    "max_detour":    ["base", "detour_5", "detour_10", "detour_20"],
    "max_occupancy": ["base", "occ_6", "occ_8"],
    "tnc_shared_demand": ["base", "shift_to_all_shared_tnc"]
}

# Minutes per bin for each scenario, needed to map depart_bin to clock time
SCENARIO_BIN_MINUTES = {
    "base":      10,
    "timebin_5": 5,
    "timebin_15": 15,
    "detour_5":  10,
    "detour_10": 10,
    "detour_20": 10,
    "occ_6":     10,
    "occ_8":     10,
    "shift_to_all_shared_tnc": 10
}

def get_bin_minutes(scenario: str) -> int:
    """Return minutes per bin for this scenario."""
    return SCENARIO_BIN_MINUTES.get(scenario, 10)

# Uniform start of day time for all scenarios
start_time = datetime(2000, 1, 1, 3, 0, 0)

dist_col_new = "OD_dist"

def load_outputs(scenario: str):
    """Read vehicle and pooled trip outputs for a given scenario."""
    p_new    = BASE_OUTPUT_DIR / scenario / "output_tnc_vehicle_trips.csv"
    p_pooled = BASE_OUTPUT_DIR / scenario / "output_tnc_pooled_trips.csv"
    new_df    = pd.read_csv(p_new)
    pooled_df = pd.read_csv(p_pooled)
    return new_df, pooled_df


In [3]:
def run_timeseries_metric(metric_func, metric_name, y_label):
    """
    metric_func(new_df, pooled_df, scenario, bin_minutes) -> DataFrame with:
        x  : usually datetime (time of day)
        y  : metric value
        hover : optional text
    """

    for test_name, scen_list in TEST_GROUPS.items():
        fig = go.Figure()
        global_min_h = None
        global_max_h = None

        for scen in scen_list:
            new_df, pooled_df = load_outputs(scen)
            bin_minutes = get_bin_minutes(scen)

            df = metric_func(new_df, pooled_df, scen, bin_minutes).copy()
            if df.empty:
                continue

            df["hover"] = df["y"]
            x_raw = df["x"]

            # Convert x to hours since 3:00
            if np.issubdtype(np.array(x_raw).dtype, np.datetime64):
                x_dt = pd.to_datetime(x_raw)
                hours_from_3 = (x_dt - start_time).dt.total_seconds() / 3600.0
            else:
                # If someone returns numeric already, just use that
                hours_from_3 = pd.to_numeric(x_raw, errors="coerce")

            hours_from_3 = hours_from_3.to_numpy()

            if hours_from_3.size == 0 or np.all(np.isnan(hours_from_3)):
                continue

            h_min = np.nanmin(hours_from_3)
            h_max = np.nanmax(hours_from_3)
            if global_min_h is None or h_min < global_min_h:
                global_min_h = h_min
            if global_max_h is None or h_max > global_max_h:
                global_max_h = h_max

            # Build per-point time label for hover
            time_labels = [
                (start_time + pd.to_timedelta(float(h), unit="h")).strftime("%H:%M")
                for h in hours_from_3
            ]

            fig.add_trace(
                go.Scatter(
                    x=hours_from_3,
                    y=df["y"],
                    mode="lines",
                    name=scen,
                    hovertemplate=(
                        "Scenario: %{meta}<br>"
                        "Time: %{customdata[0]}<br>"
                        "%{customdata[1]}<extra></extra>"
                    ),
                    meta=scen,
                    customdata=np.column_stack([time_labels, df["hover"].astype(str)]),
                )
            )

        fig.update_layout(
            title=f"{metric_name} — {test_name}",
            xaxis_title="Time of day",
            yaxis_title=y_label,
            hovermode="x unified",
        )

        # Configure the numeric time axis (hours from 3:00) with HH:MM labels
        if global_min_h is not None and global_max_h is not None:
            lo = int(np.floor(global_min_h))
            hi = int(np.ceil(global_max_h))
            tickvals = list(range(lo, hi + 1))
            ticktext = [
                (start_time + pd.to_timedelta(h, unit="h")).strftime("%H:%M")
                for h in tickvals
            ]

            fig.update_xaxes(
                tickmode="array",
                tickvals=tickvals,
                ticktext=ticktext,
            )

        fig.show()


def run_scalar_metric(metric_func, metric_name):
    """
    metric_func(new_df, pooled_df) -> dict of scalar stats.
    Any column whose name starts with 'share_' will be printed as a percent
    with 2 decimal places.
    """

    for test_name, scen_list in TEST_GROUPS.items():
        rows = []
        for scen in scen_list:
            new_df, pooled_df = load_outputs(scen)
            stats = metric_func(new_df, pooled_df)
            row = {"scenario": scen}
            row.update(stats)
            rows.append(row)

        df_stats = pd.DataFrame(rows).set_index("scenario")

        # Format share_* columns as percentages with 2 decimals
        for col in df_stats.columns:
            if col.startswith("share_"):
                df_stats[col] = (df_stats[col] * 100).round(2).astype(str) + "%"

        print(f"\n{metric_name} — {test_name}")
        print(df_stats.to_string())


### Number of pooled vehicle trips

In [4]:
# shared-TNC “requested trips” 
demand_dir = Path(r"C:\projects\sandag\av_tnc_routing\av_run_dir\tnc_routing_data")
demand_files = [
    "final_trips.parquet",
    "final_trips_xborder.parquet",
    "final_trips_visitor.parquet",
    "final_santrips.parquet",
    "final_cbxtrips.parquet",
]

frames = []
for fn in demand_files:
    df = pd.read_parquet(demand_dir / fn, columns=["trip_mode"])
    frames.append(df)

all_trips = pd.concat(frames, ignore_index=True)
n_shared_requested = int((all_trips["trip_mode"] == "TNC_SHARED").sum())

# Original shared TNC trips in demand
n_shared_orig = (all_trips["trip_mode"] == "TNC_SHARED").sum()

# DRIVEALONE trips that we convert in taxi_tnc_routing.py
n_da = (all_trips["trip_mode"] == "DRIVEALONE").sum()
n_boost = min(5000, n_da)  
n_shared_effective = n_shared_orig + n_boost

print(
    f"Total shared TNC requested vehicle trips: "
    f"{n_shared_effective:,}  "
    f"= {n_shared_orig:,} original TNC_SHARED + {n_boost:,} converted DRIVEALONE\n"
)


for group_name, scenarios in TEST_GROUPS.items():
    print(f"= {group_name} =")
    for scen in scenarios:
        scen_dir = BASE_OUTPUT_DIR / scen

        pooled_path = scen_dir / "output_tnc_pooled_trips.csv"
        pooled_df = pd.read_csv(pooled_path, usecols=["trip_i", "trip_j"])
        n_pooled = int(pooled_df["trip_j"].notna().sum())

        print(f"{scen:12s}: {n_pooled:8,d} pooled veh trips ")
    print()


Total shared TNC requested vehicle trips: 5,051  = 51 original TNC_SHARED + 5,000 converted DRIVEALONE

= time_bin_size =
base        :      816 pooled veh trips 
timebin_5   :      549 pooled veh trips 
timebin_15  :    1,022 pooled veh trips 

= max_detour =
base        :      816 pooled veh trips 
detour_5    :       42 pooled veh trips 
detour_10   :      368 pooled veh trips 
detour_20   :    1,028 pooled veh trips 

= max_occupancy =
base        :      816 pooled veh trips 
occ_6       :      851 pooled veh trips 
occ_8       :      831 pooled veh trips 

= tnc_shared_demand =
base        :      816 pooled veh trips 
shift_to_all_shared_tnc: 5,223,850 pooled veh trips 



In [5]:
mode_counts = (
    all_trips["trip_mode"]
    .astype("string")
    .fillna("MISSING")
    .value_counts(dropna=False)
    .sort_index()
)
print("Trip counts by trip_mode (all input files combined):")
for mode, cnt in mode_counts.items():
    print(f"  {mode:20s} {int(cnt):,}")

print(f"\nTOTAL: {int(mode_counts.sum()):,}")

Trip counts by trip_mode (all input files combined):
  BIKE                 207,568
  DRIVEALONE           6,706,371
  EBIKE                9,434
  ESCOOTER             2,964
  KNR_LOC              3,352
  KNR_MIX              1,230
  KNR_PRM              21,589
  PNR_LOC              425
  PNR_MIX              203
  PNR_PRM              8,794
  SCH_BUS              20,502
  SHARED2              2,336,043
  SHARED3              1,842,507
  TAXI                 44,436
  TNC_LOC              51
  TNC_MIX              15
  TNC_PRM              276
  TNC_SHARED           51
  TNC_SINGLE           123,986
  WALK                 1,949,737
  WALK_LOC             58,974
  WALK_MIX             32,833
  WALK_PRM             73,871

TOTAL: 13,445,212


### Fleet Size (cumulative vehicles)

In [6]:
def metric_fleet_size(new_df, pooled_df, scenario, bin_minutes):
    # First depart_bin per vehicle
    new_first = (
        new_df.assign(depart_bin=lambda d: pd.to_numeric(d["depart_bin"], errors="coerce").astype("Int64"))
              .dropna(subset=["depart_bin", "vehicle_id"])
              .groupby("vehicle_id")["depart_bin"]
              .min()
    )

    if new_first.empty:
        return pd.DataFrame(columns=["x", "y", "hover"])

    intro = new_first.value_counts().sort_index()
    intro = intro.reindex(
        pd.RangeIndex(0, intro.index.max() + 1),
        fill_value=0
    ).cumsum()

    # Use scenario-specific bin size to map to clock time
    times = start_time + pd.to_timedelta(intro.index * bin_minutes, unit="m")
    vals = intro.values

    df = pd.DataFrame({"x": times, "y": vals})
    df["hover"] = [f"Vehicles in service: {int(v):,}" for v in vals]
    return df

run_timeseries_metric(
    metric_fleet_size,
    "Fleet Size (cumulative vehicles)",
    "Cumulative unique vehicles",
)


### Number of requested vehicle trips serviced (as opposed to deadheading trips)

In [7]:
def metric_requested_trips_per_hour(new_df, pooled_df, scenario, bin_minutes):
    """
    Requested veh trips/hour by time of day.
    Requested veh trips = veh trips with passengers onboard:
      - trip_i or trip_j non-null
      - is_deadhead == False
    Scaled to veh trips/hour using the scenario’s bin size.
    """
    requested_mask = (
        (new_df["trip_i"].notna() | new_df["trip_j"].notna())
        & (new_df["is_deadhead"] == False)
    )

    bins = pd.to_numeric(new_df.loc[requested_mask, "depart_bin"], errors="coerce").dropna().astype(int)
    if bins.empty:
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    counts = (
        bins.value_counts()
            .sort_index()
            .reindex(pd.RangeIndex(0, bins.max() + 1), fill_value=0)
    )

    # normalize to trips/hour according to bin size
    factor = 60.0 / bin_minutes
    rate = counts * factor

    times = start_time + pd.to_timedelta(rate.index * bin_minutes, unit="m")
    vals = rate.values

    df = pd.DataFrame({"x": times, "y": vals})
    df["hover"] = [
        f"Veh trips/hour: {v:,.2f}  (bin trips: {int(c):,}, bin={bin_minutes} min)"
        for v, c in zip(vals, counts.values)
    ]
    return df


run_timeseries_metric(
    metric_requested_trips_per_hour,
    "Number of vehicle trips serviced (REAL)",
    "Veh trips per hour",
)


In [8]:
def metric_requested_trip_totals(new_df, pooled_df):
    requested_mask = (
        (new_df["trip_i"].notna() | new_df["trip_j"].notna())
        & (new_df["is_deadhead"] == False)
    )
    total_real = int(requested_mask.sum())
    total_trips = len(new_df)
    return {
        "requested_trips": total_real,
        "all_trips": total_trips,
        "share_real": total_real / total_trips if total_trips else np.nan,
    }

run_scalar_metric(metric_requested_trip_totals, "REAL veh trips totals")



REAL veh trips totals — time_bin_size
            requested_trips  all_trips share_real
scenario                                         
base                 213596     428566     49.84%
timebin_5            213329     429146     49.71%
timebin_15           213802     427906     49.96%

REAL veh trips totals — max_detour
           requested_trips  all_trips share_real
scenario                                        
base                213596     428566     49.84%
detour_5            212822     428575     49.66%
detour_10           213148     428570     49.73%
detour_20           213808     428617     49.88%

REAL veh trips totals — max_occupancy
          requested_trips  all_trips share_real
scenario                                       
base               213596     428566     49.84%
occ_6              199956     400900     49.88%
occ_8              192021     384969     49.88%

REAL veh trips totals — tnc_shared_demand
                         requested_trips  all_trips share_r

### Share of deadheading trips (by trips and VMT)

In [9]:
def metric_deadhead_share(new_df, pooled_df):
    d = new_df.copy()
    d[dist_col_new] = pd.to_numeric(d[dist_col_new], errors="coerce").fillna(0.0)

    total_trips = len(d)
    dead_trips = int(d["is_deadhead"].sum())
    share_trips = dead_trips / total_trips if total_trips else np.nan

    dead_vmt = d.loc[d["is_deadhead"], dist_col_new].sum()
    total_vmt = d[dist_col_new].sum()
    share_vmt = dead_vmt / total_vmt if total_vmt else np.nan

    return {
        "dead_trips": dead_trips,
        "total_trips": total_trips,
        "share_trips": share_trips,
        "dead_vmt": dead_vmt,
        "total_vmt": total_vmt,
        "share_vmt": share_vmt,
    }

run_scalar_metric(metric_deadhead_share, "Share of deadheading trips")



Share of deadheading trips — time_bin_size
            dead_trips  total_trips share_trips       dead_vmt     total_vmt share_vmt
scenario                                                                              
base            214970       428566      50.16%  237333.623497  1.874488e+06    12.66%
timebin_5       215817       429146      50.29%  267909.644660  1.905020e+06    14.06%
timebin_15      214104       427906      50.04%  222415.540618  1.859414e+06    11.96%

Share of deadheading trips — max_detour
           dead_trips  total_trips share_trips       dead_vmt     total_vmt share_vmt
scenario                                                                             
base           214970       428566      50.16%  237333.623497  1.874488e+06    12.66%
detour_5       215753       428575      50.34%  238908.533389  1.875645e+06    12.74%
detour_10      215422       428570      50.27%  238535.462350  1.875214e+06    12.72%
detour_20      214809       428617      50.12%  23

### Number and VMT of all veh trips made by TNCs (per hour)

In [10]:
def metric_all_trips_per_hour(new_df, pooled_df, scenario, bin_minutes):
    """
    All veh trips/hour (requested + deadhead + refuel), scaled by bin size.
    """
    bins = pd.to_numeric(new_df["depart_bin"], errors="coerce").dropna().astype(int)
    if bins.empty:
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    counts = (
        bins.value_counts()
            .sort_index()
            .reindex(pd.RangeIndex(0, bins.max() + 1), fill_value=0)
    )

    factor = 60.0 / bin_minutes
    rate = counts * factor

    times = start_time + pd.to_timedelta(rate.index * bin_minutes, unit="m")
    vals = rate.values

    df = pd.DataFrame({"x": times, "y": vals})
    df["hover"] = [
        f"Trips/hour: {v:,.2f}  (bin legs: {int(c):,}, bin={bin_minutes} min)"
        for v, c in zip(vals, counts.values)
    ]
    return df


run_timeseries_metric(
    metric_all_trips_per_hour,
    "All TNC trips (normalized to trips/hour)",
    "Trips per hour",
)


In [11]:
def metric_all_vmt_per_hour(new_df, pooled_df, scenario, bin_minutes):
    """
    VMT/hour for all veh trips, using dist_col_new (OD_dist) and bin size.
    """
    dist = pd.to_numeric(new_df[dist_col_new], errors="coerce")
    bins = pd.to_numeric(new_df["depart_bin"], errors="coerce")

    valid = bins.notna() & dist.notna()
    if not valid.any():
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    bins = bins[valid].astype(int)
    dist = dist[valid]

    vmt_by_bin = (
        pd.Series(dist.values, index=bins.values)
          .groupby(level=0).sum()
          .sort_index()
          .reindex(pd.RangeIndex(0, bins.max() + 1), fill_value=0.0)
    )

    factor = 60.0 / bin_minutes
    vmt_rate = vmt_by_bin * factor

    times = start_time + pd.to_timedelta(vmt_rate.index * bin_minutes, unit="m")
    vals = vmt_rate.values

    df = pd.DataFrame({"x": times, "y": vals})
    df["hover"] = [
        f"VMT/hour: {v:,.1f}  (bin VMT: {b:,.1f}, bin={bin_minutes} min)"
        for v, b in zip(vals, vmt_by_bin.values)
    ]
    return df


run_timeseries_metric(
    metric_all_vmt_per_hour,
    "All TNC VMT (normalized to per-hour)",
    "VMT per hour",
)


In [12]:
def metric_all_trips_totals(new_df, pooled_df):
    d = new_df.copy()
    d[dist_col_new] = pd.to_numeric(d[dist_col_new], errors="coerce").fillna(0.0)
    return {
        "total_trips": len(d),
        "total_vmt": d[dist_col_new].sum(),
    }

run_scalar_metric(metric_all_trips_totals, "All TNC trips totals")



All TNC trips totals — time_bin_size
            total_trips     total_vmt
scenario                             
base             428566  1.874488e+06
timebin_5        429146  1.905020e+06
timebin_15       427906  1.859414e+06

All TNC trips totals — max_detour
           total_trips     total_vmt
scenario                            
base            428566  1.874488e+06
detour_5        428575  1.875645e+06
detour_10       428570  1.875214e+06
detour_20       428617  1.875890e+06

All TNC trips totals — max_occupancy
          total_trips     total_vmt
scenario                           
base           428566  1.874488e+06
occ_6          400900  1.754936e+06
occ_8          384969  1.697862e+06

All TNC trips totals — tnc_shared_demand
                         total_trips     total_vmt
scenario                                          
base                          428566  1.874488e+06
shift_to_all_shared_tnc     22328970  5.502521e+07


### Avg initial wait time by hour (ignoring first pickup by each vehicle)

In [13]:
def metric_initial_wait_by_hour(new_df, pooled_df, scenario, bin_minutes):
    """
    Average initial wait time by hour of day, excluding each vehicle's first pickup (when it gets spawned).
    Uses OD_time on pickup legs.
    """
    df = new_df.copy()

    # pickups only
    pickup_mask = df["trip_type"].astype(str).str.lower().eq("pickup")
    df = df.loc[pickup_mask].copy()

    df["depart_bin"] = pd.to_numeric(df["depart_bin"], errors="coerce")
    df["OD_time"]    = pd.to_numeric(df["OD_time"],    errors="coerce")

    valid = df["depart_bin"].notna() & df["OD_time"].notna()
    df = df.loc[valid].copy()

    if df.empty:
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    # drop first pickup per vehicle
    df["depart_bin_int"] = df["depart_bin"].astype(int)
    first_per_vehicle = (
        df.groupby("vehicle_id")["depart_bin_int"].transform("min")
    )
    df = df[df["depart_bin_int"] > first_per_vehicle]

    if df.empty:
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    # hour_from_3am = floor( (depart_bin * bin_minutes) / 60 )
    df["hour_from_3am"] = ((df["depart_bin_int"] * bin_minutes) // 60).astype(int)

    wait_by_hour = (
        df.groupby("hour_from_3am")["OD_time"]
          .mean()
          .rename("avg_wait")
          .reset_index()
    )

    times = start_time + pd.to_timedelta(wait_by_hour["hour_from_3am"] * 60, unit="m")
    vals = wait_by_hour["avg_wait"].values

    out = pd.DataFrame({"x": times, "y": vals})
    out["hover"] = [
        f"Avg wait: {v:.2f} min"
        for v in vals
    ]
    return out


run_timeseries_metric(
    metric_initial_wait_by_hour,
    "Average initial wait time (excluding first pickup per vehicle)",
    "Minutes",
)


### Avg detour time

In [14]:
def metric_avg_detour(new_df, pooled_df):
    mask_pooled = pooled_df["trip_j"].notna()

    d_i = pd.to_numeric(pooled_df.loc[mask_pooled, "detour_i"], errors="coerce")
    d_j = pd.to_numeric(pooled_df.loc[mask_pooled, "detour_j"], errors="coerce")

    vals = pd.concat([d_i, d_j], ignore_index=True).dropna()
    avg_detour = vals.mean() if not vals.empty else np.nan

    return {"avg_detour": avg_detour}

run_scalar_metric(metric_avg_detour, "Average detour time (pooled trips)")



Average detour time (pooled trips) — time_bin_size
            avg_detour
scenario              
base          8.676728
timebin_5     8.751975
timebin_15    8.468652

Average detour time (pooled trips) — max_detour
           avg_detour
scenario             
base         8.676728
detour_5     2.915697
detour_10    6.210723
detour_20   10.034144

Average detour time (pooled trips) — max_occupancy
          avg_detour
scenario            
base        8.676728
occ_6       8.956039
occ_8       8.942742

Average detour time (pooled trips) — tnc_shared_demand
                         avg_detour
scenario                           
base                       8.676728
shift_to_all_shared_tnc    3.937055


In [15]:
# Too few data points

# def run_detour_histograms():
#     """
#     For each test group, plot detour distributions for base + its variants.
#     Detours come from pooled trips where trip_j is not null, using detour_i and detour_j.
#     """
#     for test_name, scen_list in TEST_GROUPS.items():
#         fig = go.Figure()

#         for scen in scen_list:
#             _, pooled_df = load_outputs(scen)

#             mask_pooled = pooled_df["trip_j"].notna()

#             d_i = pd.to_numeric(pooled_df.loc[mask_pooled, "detour_i"], errors="coerce")
#             d_j = pd.to_numeric(pooled_df.loc[mask_pooled, "detour_j"], errors="coerce")

#             vals = pd.concat([d_i, d_j], ignore_index=True).dropna()
#             if vals.empty:
#                 continue

#             fig.add_trace(
#                 go.Histogram(
#                     x=vals,
#                     name=scen,
#                     opacity=0.45,
#                     nbinsx=40,
#                     hovertemplate="Scenario: %{meta}<br>Detour: %{x:.2f} min<br>Count: %{y}<extra></extra>",
#                     meta=scen,
#                 )
#             )

#         fig.update_layout(
#             title=f"Detour time distribution — {test_name}",
#             xaxis_title="Detour time (minutes)",
#             yaxis_title="Count",
#             barmode="overlay",
#         )
#         fig.show()

# # Call once to generate histograms for all three tests
# run_detour_histograms()


### Avg occupancy by hour

In [16]:
def metric_occupancy_by_time_bin(new_df, pooled_df, scenario, bin_minutes):
    """
    OD_dist-weighted average occupancy by *native time bin* (depart_bin),
    including deadheading trips.
    """
    occ  = pd.to_numeric(new_df["occupancy"],  errors="coerce")
    dist = pd.to_numeric(new_df[dist_col_new], errors="coerce")
    bins = pd.to_numeric(new_df["depart_bin"], errors="coerce")

    valid = (
        bins.notna()
        & dist.notna()
        & occ.notna()
    )
    if not valid.any():
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    df = pd.DataFrame({
        "depart_bin_int": bins[valid].astype(int),
        "occ":            occ[valid].astype(float),
        "dist":           dist[valid].astype(float),
    })

    # OD_dist-weighted average occupancy per *depart_bin*
    df["wv"] = df["occ"] * df["dist"]

    agg = (
        df.groupby("depart_bin_int")
          .agg(sum_w=("dist", "sum"), sum_wv=("wv", "sum"), n=("occ", "size"))
          .reset_index()
    )
    agg = agg[agg["sum_w"] > 0]
    agg["avg_occupancy"] = (agg["sum_wv"] / agg["sum_w"]).astype(float)

    # Native time bins → clock time using bin_minutes
    # Example: bin 0 = 3:00, bin 1 = 3:00 + bin_minutes, etc.
    times = start_time + pd.to_timedelta(agg["depart_bin_int"] * bin_minutes, unit="m")
    vals  = agg["avg_occupancy"].values

    out = pd.DataFrame({"x": times, "y": vals})
    out["hover"] = [
        f"Time: {t:%H:%M}  | Avg occ: {v:.3f}  (legs: {int(n):,}, total OD_dist: {w:,.1f})"
        for t, v, n, w in zip(times, vals, agg["n"], agg["sum_w"])
    ]
    return out


run_timeseries_metric(
    metric_occupancy_by_time_bin,
    "Average occupancy by time bin (OD_dist-weighted)",
    "Average occupancy",
)


In [17]:
# def metric_occupancy_by_hour(new_df, pooled_df, scenario, bin_minutes):
#     """
#     OD_dist-weighted average occupancy by hour of day, including deadheading trips.
#     """
#     occ  = pd.to_numeric(new_df["occupancy"],  errors="coerce")
#     dist = pd.to_numeric(new_df[dist_col_new], errors="coerce")
#     bins = pd.to_numeric(new_df["depart_bin"], errors="coerce")

#     valid = (
#         bins.notna()
#         & dist.notna() 
#         & occ.notna() 
#     )
#     if not valid.any():
#         return pd.DataFrame({"x": [], "y": [], "hover": []})

#     df = pd.DataFrame({
#         "depart_bin_int": bins[valid].astype(int),
#         "occ":            occ[valid].astype(float),
#         "dist":           dist[valid].astype(float),
#     })

#     # hour_from_3am based on elapsed minutes = depart_bin * bin_minutes
#     df["hour_from_3am"] = ((df["depart_bin_int"] * bin_minutes) // 60).astype(int)
#     df["wv"] = df["occ"] * df["dist"]

#     agg = (
#         df.groupby("hour_from_3am")
#           .agg(sum_w=("dist", "sum"), sum_wv=("wv", "sum"), n=("occ", "size"))
#           .reset_index()
#     )
#     agg = agg[agg["sum_w"] > 0]
#     agg["avg_occupancy"] = (agg["sum_wv"] / agg["sum_w"]).astype(float)

#     times = start_time + pd.to_timedelta(agg["hour_from_3am"] * 60, unit="m")
#     vals  = agg["avg_occupancy"].values

#     out = pd.DataFrame({"x": times, "y": vals})
#     out["hover"] = [
#         f"Avg occ: {v:.3f}  (legs: {int(n):,}, total OD_dist: {w:,.1f})"
#         for v, n, w in zip(vals, agg["n"], agg["sum_w"])
#     ]
#     return out


# run_timeseries_metric(
#     metric_occupancy_by_hour,
#     "Average occupancy by hour (OD_dist-weighted)",
#     "Average occupancy",
# )


In [18]:
def metric_occupancy_trip_vmt(new_df, pooled_df):
    occ = pd.to_numeric(new_df["occupancy"], errors="coerce")
    dist = pd.to_numeric(new_df[dist_col_new], errors="coerce")

    valid = occ.notna() & dist.notna() & (dist >= 0)
    occ = occ[valid]
    dist = dist[valid]

    avg_trip_occ = occ.mean()
    w_sum = dist.sum()
    avg_vmt_occ = (dist * occ).sum() / w_sum if w_sum > 0 else np.nan

    return {
        "avg_occ_by_trip": avg_trip_occ,
        "avg_occ_by_vmt": avg_vmt_occ,
    }

run_scalar_metric(metric_occupancy_trip_vmt, "Occupancy by veh trips and VMT")



Occupancy by veh trips and VMT — time_bin_size
            avg_occ_by_trip  avg_occ_by_vmt
scenario                                   
base               0.933701        1.589033
timebin_5          0.932447        1.564145
timebin_15         0.934670        1.601782

Occupancy by veh trips and VMT — max_detour
           avg_occ_by_trip  avg_occ_by_vmt
scenario                                  
base              0.933701        1.589033
detour_5          0.930064        1.587440
detour_10         0.931596        1.587707
detour_20         0.934698        1.588779

Occupancy by veh trips and VMT — max_occupancy
          avg_occ_by_trip  avg_occ_by_vmt
scenario                                 
base             0.933701        1.589033
occ_6            0.998006        1.697590
occ_8            1.039218        1.754532

Occupancy by veh trips and VMT — tnc_shared_demand
                         avg_occ_by_trip  avg_occ_by_vmt
scenario                                                
base 

### Share and profile of refueling trips

In [19]:
def metric_refuel_trips_by_time_bin(new_df, pooled_df, scenario, bin_minutes):
    """
    Refuel trips/hour by time of day.
    trip_type == 'refuel'
    """
    refuel_mask = new_df["trip_type"].astype(str).str.lower().eq("refuel")
    bins = pd.to_numeric(new_df.loc[refuel_mask, "depart_bin"], errors="coerce").dropna().astype(int)

    if bins.empty:
        return pd.DataFrame({"x": [], "y": [], "hover": []})

    counts = (
        bins.value_counts()
            .sort_index()
            .reindex(pd.RangeIndex(0, bins.max() + 1), fill_value=0)
    )

    factor = 60.0 / bin_minutes
    rate = counts * factor

    times = start_time + pd.to_timedelta(rate.index * bin_minutes, unit="m")
    vals = rate.values

    df = pd.DataFrame({"x": times, "y": vals})
    df["hover"] = [
        f"Refuel trips/hour: {v:,.2f}  (bin trips: {int(c):,}, bin={bin_minutes} min)"
        for v, c in zip(vals, counts.values)
    ]
    return df


run_timeseries_metric(
    metric_refuel_trips_by_time_bin,
    "Refuel trips (per hour)",
    "Trips per hour",
)


In [20]:
def metric_refuel_share(new_df, pooled_df):
    tt = new_df["trip_type"].astype(str).str.lower()
    is_refuel = tt.eq("refuel")

    dist = pd.to_numeric(new_df[dist_col_new], errors="coerce").fillna(0.0)

    n_ref = int(is_refuel.sum())
    n_all = len(new_df)
    share_trips = n_ref / n_all if n_all else np.nan

    vmt_ref = dist[is_refuel].sum()
    vmt_all = dist.sum()
    share_vmt = vmt_ref / vmt_all if vmt_all else np.nan

    return {
        "refuel_trips": n_ref,
        "all_trips": n_all,
        "share_trips": share_trips,
        "refuel_vmt": vmt_ref,
        "all_vmt": vmt_all,
        "share_vmt": share_vmt,
    }

run_scalar_metric(metric_refuel_share, "Share of refueling trips")



Share of refueling trips — time_bin_size
            refuel_trips  all_trips share_trips   refuel_vmt       all_vmt share_vmt
scenario                                                                            
base                3006     428566        0.7%  5779.081723  1.874488e+06     0.31%
timebin_5           3586     429146       0.84%  6948.751708  1.905020e+06     0.36%
timebin_15          2346     427906       0.55%  5349.993074  1.859414e+06     0.29%

Share of refueling trips — max_detour
           refuel_trips  all_trips share_trips   refuel_vmt       all_vmt share_vmt
scenario                                                                           
base               3006     428566        0.7%  5779.081723  1.874488e+06     0.31%
detour_5           3015     428575        0.7%  6221.878565  1.875645e+06     0.33%
detour_10          3010     428570        0.7%  5815.520398  1.875214e+06     0.31%
detour_20          3057     428617       0.71%  6135.046131  1.875890e+06 

#### QAQC refuel trips

In [21]:

OUTPUT_ROOT = Path(r"C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\tnc_routing_test")
timebin_scenarios = ["base", "timebin_5", "timebin_15"]

refuel_dist = {}

for scen in timebin_scenarios:
    csv_path = OUTPUT_ROOT / scen / "output_tnc_vehicle_trips.csv"
    df = pd.read_csv(csv_path)

    df["OD_dist"] = pd.to_numeric(df["OD_dist"], errors="coerce").fillna(0.0)
    df["trip_type"] = df["trip_type"].astype(str).str.lower()
    df = df.sort_values(["vehicle_id", "vehicle_trip_id"], kind="mergesort")

    pieces = []
    for vid, sub in df.groupby("vehicle_id", sort=False):
        od = sub["OD_dist"]
        cum = od.cumsum()
        is_ref = sub["trip_type"].eq("refuel")
        if not is_ref.any():
            continue
        cum_at_refuel = cum.where(is_ref)
        prev_cum_at_refuel = cum_at_refuel.shift().ffill().fillna(0.0)
        dist_since_last = cum - prev_cum_at_refuel

        out = sub.loc[is_ref, ["vehicle_id", "vehicle_trip_id"]].copy()
        out["dist_since_last_refuel"] = dist_since_last[is_ref].values
        pieces.append(out)

    if pieces:
        refuel_dist[scen] = pd.concat(pieces, ignore_index=True)
    else:
        refuel_dist[scen] = pd.DataFrame(
            columns=["vehicle_id", "vehicle_trip_id", "dist_since_last_refuel"]
        )

fig = go.Figure()

for scen in timebin_scenarios:
    d = refuel_dist[scen]["dist_since_last_refuel"].astype(float)
    d = d[d > 0]
    if d.empty:
        continue

    fig.add_trace(
        go.Histogram(
            x=d,
            nbinsx=40,
            name=scen,
            opacity=0.5,
            histnorm="probability",
            hovertemplate=(
                f"Scenario: {scen}<br>"
                "Dist since last refuel: %{x:,.1f}<br>"
                "Bin share: %{y:.3f}<extra></extra>"
            ),
        )
    )

fig.update_layout(
    barmode="overlay",
    title="Distribution of distance since last refuel",
    xaxis_title="Distance since last refuel (miles)",
    yaxis_title="Share",
    hovermode="x unified",
)

fig.add_shape(
    type="line",
    x0=300,
    x1=300,
    y0=0,
    y1=1,
    xref="x",
    yref="paper",
    line=dict(dash="dash"),
)

fig.show()
