In [32]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import openmatrix as omx

In [33]:
new_df = pd.read_csv(
    r"C:\projects\sandag\av_tnc_routing\av_run_dir\outputs\tnc_routing_test\output_tnc_vehicle_trips.csv"
)
old_df = pd.read_csv(
    r"C:\projects\sandag\av_tnc_routing\av_run_dir\tnc_routing_data\TNCTrips.csv"
).rename(columns={
    "vehicle_ID": "vehicle_id",
    "totalPassengers": "occupancy",
    "originMgra": "origin",
    "destinationMgra": "destination"
})

start_time = pd.Timestamp("2000-01-01 03:00:00")

landuse_file = r"C:\projects\sandag\av_tnc_routing\av_run_dir\tnc_routing_data\final_land_use.csv"
omx_path = r"C:\projects\sandag\av_tnc_routing\av_run_dir\input_data_full\skims\traffic_skims_MD.omx"
skim_core = "SOV_TR_H_DIST__MD"

## Total number of vehicles in service

In [34]:
# NEW
new_first = (
    new_df.assign(depart_bin=lambda d: d["depart_bin"].astype(int))
          .groupby("vehicle_id")["depart_bin"].min()
)
new_intro = new_first.value_counts().sort_index()
new_intro = new_intro.reindex(pd.RangeIndex(0, new_intro.index.max() + 1), fill_value=0).cumsum()
new_intro_time = new_intro.copy()
new_intro_time.index = start_time + pd.to_timedelta(new_intro_time.index * 10, unit="m")

# OLD
old_first = (
    old_df.assign(startPeriod=lambda d: d["startPeriod"].astype(int))
          .groupby("vehicle_id")["startPeriod"].min()
)
old_intro = old_first.value_counts().sort_index()
old_intro = old_intro.reindex(pd.RangeIndex(0, old_intro.index.max() + 1), fill_value=0).cumsum()
old_intro_time = old_intro.copy()
old_intro_time.index = start_time + pd.to_timedelta(old_intro_time.index * 5, unit="m")

print("Item 1 — Unique vehicles:")
print(f"  New: {new_first.shape[0]:,}")
print(f"  Old: {old_first.shape[0]:,}")

fig = go.Figure()
fig.add_trace(go.Scatter(x=old_intro_time.index, y=old_intro_time.values, mode="lines",
                         name="Old"))
fig.add_trace(go.Scatter(x=new_intro_time.index, y=new_intro_time.values, mode="lines",
                         name="New"))
fig.update_layout(
    title="Item 1 — Cumulative Vehicles in Service",
    xaxis_title="Time of Day",
    yaxis_title="Cumulative Unique Vehicles",
    hovermode="x unified",
    xaxis=dict(tickformat="%H:%M")
)
fig.show()


Item 1 — Unique vehicles:
  New: 7,528
  Old: 17,615


## Total number of (real) trips serviced

In [35]:

# REAL-trip
real_new = (new_df["trip_i"].notna() | new_df["trip_j"].notna()) & (new_df["is_deadhead"] == False)
occ_old_num = pd.to_numeric(old_df["occupancy"], errors="coerce").fillna(0)
real_old = (occ_old_num > 0) & old_df["pickupIdsAtOrigin"].notna() & old_df["dropoffIdsAtOrigin"].isna()

# Bin counts
old_bins = old_df.loc[real_old, "startPeriod"].dropna().astype(int)
new_bins = new_df.loc[real_new, "depart_bin"].dropna().astype(int)

old_counts = old_bins.value_counts().sort_index().reindex(pd.RangeIndex(0, old_bins.max() + 1), fill_value=0)
new_counts = new_bins.value_counts().sort_index().reindex(pd.RangeIndex(0, new_bins.max() + 1), fill_value=0)

# Normalize to trips/hour 
old_rate = old_counts * (60 / 5)   # 5-min bins : per hour
new_rate = new_counts * (60 / 10)  # 10-min bins : per hour

# Map to time 
old_rate_time = start_time + pd.to_timedelta(old_rate.index * 5, unit="m")
new_rate_time = start_time + pd.to_timedelta(new_rate.index * 10, unit="m")

fig = go.Figure()
fig.add_trace(go.Scatter(x=old_rate_time, y=old_rate.values, mode="lines",
                         name="Old REAL trips/hour (normalized)"))
fig.add_trace(go.Scatter(x=new_rate_time, y=new_rate.values, mode="lines",
                         name="New REAL trips/hour (normalized)"))
fig.update_layout(
    title="Item 2 — REAL Trips (per hour)",
    xaxis_title="Time of Day",
    yaxis_title="Trips per hour",
    hovermode="x unified",
    xaxis=dict(tickformat="%H:%M")
)
fig.show()


# Systemwide totals
print("Item 2 — REAL trips")
print(f"  Old REAL trips: {int(real_old.sum()):,}")
print(f"  New REAL trips: {int(real_new.sum()):,}")


Item 2 — REAL trips
  Old REAL trips: 229,785
  New REAL trips: 186,115


In [36]:
# Old output (startPeriod, 5-min bins)
# Max possible startPeriod = 287
# This is ok

# New output (depart_bin, 10-min bins)
# Max possible depart_bin = 143
# there are records bigger than 143. records with > 144 are all deadhead trips but there are a few = 144 that are not.

## Total number of trips made by the TNC vehicles 

In [37]:
old_bins_all = old_df["startPeriod"].dropna().astype(int)
new_bins_all = new_df["depart_bin"].dropna().astype(int)

old_counts_all = old_bins_all.value_counts().sort_index().reindex(
    pd.RangeIndex(0, old_bins_all.max() + 1), fill_value=0
)
new_counts_all = new_bins_all.value_counts().sort_index().reindex(
    pd.RangeIndex(0, new_bins_all.max() + 1), fill_value=0
)

# normalize to trips/hour
old_rate_all = old_counts_all * (60 / 5)    # 5-min bins : per hour
new_rate_all = new_counts_all * (60 / 10)   # 10-min bins : per hour

old_rate_time = start_time + pd.to_timedelta(old_rate_all.index * 5, unit="m")
new_rate_time = start_time + pd.to_timedelta(new_rate_all.index * 10, unit="m")

fig = go.Figure()
fig.add_trace(go.Scatter(x=old_rate_time, y=old_rate_all.values, mode="lines",
                         name="Old (all trips/hour, normalized)"))
fig.add_trace(go.Scatter(x=new_rate_time, y=new_rate_all.values, mode="lines",
                         name="New (all trips/hour, normalized)"))
fig.update_layout(
    title="Item 3 — All TNC Trips (normalized to trips/hour)",
    xaxis_title="Time of Day",
    yaxis_title="Trips per hour",
    hovermode="x unified",
    xaxis=dict(tickformat="%H:%M")
)
fig.show()


# Systemwide totals (all trips)
print("Item 3 — Systemwide (All TNC trips)")
print(f"  Old total trips: {int(old_counts_all.sum()):,}")
print(f"  New total trips: {int(new_counts_all.sum()):,}")

Item 3 — Systemwide (All TNC trips)
  Old total trips: 381,347
  New total trips: 413,065


### Non-deadhead trips made by the TNC vehicles 

In [38]:
old_bins_all = old_df[old_df.occupancy > 0]["startPeriod"].dropna().astype(int)
new_bins_all = new_df[new_df.is_deadhead == False]["depart_bin"].dropna().astype(int)

old_counts_all = old_bins_all.value_counts().sort_index().reindex(
    pd.RangeIndex(0, old_bins_all.max() + 1), fill_value=0
)
new_counts_all = new_bins_all.value_counts().sort_index().reindex(
    pd.RangeIndex(0, new_bins_all.max() + 1), fill_value=0
)

# normalize to trips/hour
old_rate_all = old_counts_all * (60 / 5)    # 5-min bins : per hour
new_rate_all = new_counts_all * (60 / 10)   # 10-min bins : per hour

old_rate_time = start_time + pd.to_timedelta(old_rate_all.index * 5, unit="m")
new_rate_time = start_time + pd.to_timedelta(new_rate_all.index * 10, unit="m")

fig = go.Figure()
fig.add_trace(go.Scatter(x=old_rate_time, y=old_rate_all.values, mode="lines",
                         name="Old (all trips/hour, normalized)"))
fig.add_trace(go.Scatter(x=new_rate_time, y=new_rate_all.values, mode="lines",
                         name="New (all trips/hour, normalized)"))
fig.update_layout(
    title="Non-Deadhead TNC Trips (normalized to trips/hour)",
    xaxis_title="Time of Day",
    yaxis_title="Trips per hour",
    hovermode="x unified",
    xaxis=dict(tickformat="%H:%M")
)
fig.show()


# Systemwide totals (all trips)
print("Systemwide Non-deadhead TNC Vehicle Trips")
print(f"  Old total trips: {int(old_counts_all.sum()):,}")
print(f"  New total trips: {int(new_counts_all.sum()):,}")

Systemwide Non-deadhead TNC Vehicle Trips
  Old total trips: 229,967
  New total trips: 186,115


## Average occupancy by number of trips and average occupancy by VMT

In [39]:
new_trips = new_df[["occupancy", "origin", "destination"]].copy()
old_trips = old_df[["occupancy", "origin", "destination"]].copy()

lu = pd.read_csv(landuse_file, usecols=["mgra", "TAZ"])

new_trips = new_trips.merge(lu.rename(columns={"mgra":"origin"}), on="origin", how="left").rename(columns={"TAZ":"oTAZ"})
new_trips = new_trips.merge(lu.rename(columns={"mgra":"destination"}), on="destination", how="left").rename(columns={"TAZ":"dTAZ"})
old_trips = old_trips.merge(lu.rename(columns={"mgra":"origin"}), on="origin", how="left").rename(columns={"TAZ":"oTAZ"})
old_trips = old_trips.merge(lu.rename(columns={"mgra":"destination"}), on="destination", how="left").rename(columns={"TAZ":"dTAZ"})

with omx.open_file(omx_path) as f:
    skim = np.array(f[skim_core])
    taz_map = f.mapping(f.list_mappings()[0])  # dict-like TAZ -> idx

taz_to_idx = pd.Series(taz_map)

# Map TAZ labels to skim indices
new_trips["otaz_idx"] = taz_to_idx.reindex(new_trips["oTAZ"].values).values
new_trips["dtaz_idx"] = taz_to_idx.reindex(new_trips["dTAZ"].values).values
old_trips["otaz_idx"] = taz_to_idx.reindex(old_trips["oTAZ"].values).values
old_trips["dtaz_idx"] = taz_to_idx.reindex(old_trips["dTAZ"].values).values

# Compute distances o
new_pairs = new_trips[["otaz_idx","dtaz_idx"]].dropna().astype("int32").drop_duplicates()
old_pairs = old_trips[["otaz_idx","dtaz_idx"]].dropna().astype("int32").drop_duplicates()
pairs = pd.concat([new_pairs, old_pairs], ignore_index=True).drop_duplicates()

pairs["trip_dist"] = skim[pairs["otaz_idx"].to_numpy(), pairs["dtaz_idx"].to_numpy()]

new_trips = new_trips.merge(pairs, on=["otaz_idx","dtaz_idx"], how="left")
old_trips = old_trips.merge(pairs, on=["otaz_idx","dtaz_idx"], how="left")

# Clean occupancy; compute distance-weighted term
for df in (new_trips, old_trips):
    df["occupancy"] = pd.to_numeric(df["occupancy"], errors="coerce")
    df["occ_x_dist"] = df["occupancy"] * df["trip_dist"]

# Systemwide averages
avg_occ_old_by_trips = old_trips["occupancy"].mean()
avg_occ_new_by_trips = new_trips["occupancy"].mean()

avg_occ_old_by_vmt = old_trips["occ_x_dist"].sum() / old_trips["trip_dist"].sum()
avg_occ_new_by_vmt = new_trips["occ_x_dist"].sum() / new_trips["trip_dist"].sum()



print("Item 4 — Average occupancy (trip-level)")
print(f"OLD  By trips: {avg_occ_old_by_trips:,.2f} | By VMT: {avg_occ_old_by_vmt:,.2f} ")
print(f"NEW  By trips: {avg_occ_new_by_trips:,.2f} | By VMT: {avg_occ_new_by_vmt:,.2f} ")


Item 4 — Average occupancy (trip-level)
OLD  By trips: 1.07 | By VMT: 1.32 
NEW  By trips: 0.45 | By VMT: 0.86 


## Number and share of deadheading by number of trips and VMT

In [40]:
new_trips = new_trips.copy()
old_trips = old_trips.copy()

# NEW: use is_deadhead from source
new_trips["is_deadhead"] = new_df["is_deadhead"].values

# OLD: deadhead => passengers==0 AND dropoffIdsAtOrigin non-null AND pickupIdsAtOrigin null
occ_old_num = pd.to_numeric(old_df["occupancy"], errors="coerce").fillna(0)
old_deadhead_mask = (occ_old_num == 0) & old_df["dropoffIdsAtOrigin"].notna() & old_df["pickupIdsAtOrigin"].isna()
old_trips["is_deadhead"] = old_deadhead_mask.values

# By TRIPS (simple counts)
old_total_trips = len(old_trips)
new_total_trips = len(new_trips)
old_dead_trips = int(old_trips["is_deadhead"].sum())
new_dead_trips = int(new_trips["is_deadhead"].sum())
old_share_trips = old_dead_trips / old_total_trips if old_total_trips else np.nan
new_share_trips = new_dead_trips / new_total_trips if new_total_trips else np.nan

# By VMT (distance-weighted)
old_dead_vmt = old_trips.loc[old_trips["is_deadhead"], "trip_dist"].sum()
new_dead_vmt = new_trips.loc[new_trips["is_deadhead"], "trip_dist"].sum()
old_total_vmt = old_trips["trip_dist"].sum()
new_total_vmt = new_trips["trip_dist"].sum()
old_share_vmt = old_dead_vmt / old_total_vmt if old_total_vmt else np.nan
new_share_vmt = new_dead_vmt / new_total_vmt if new_total_vmt else np.nan

# --- Report
print("Item 5 — Deadheading rate")
print("OLD")
print(f"  By trips: {old_dead_trips:,} / {old_total_trips:,}: {old_share_trips:,.1%}")
print(f"  By VMT:   {old_dead_vmt:,.0f} / {old_total_vmt:,.0f}: {old_share_vmt:,.1%}")
print("NEW")
print(f"  By trips: {new_dead_trips:,} / {new_total_trips:,}: {new_share_trips:,.1%}")
print(f"  By VMT:   {new_dead_vmt:,.0f} / {new_total_vmt:,.0f}: {new_share_vmt:,.1%}")


Item 5 — Deadheading rate
OLD
  By trips: 151,376 / 381,347: 39.7%
  By VMT:   593,419 / 1,729,081: 34.3%
NEW
  By trips: 226,950 / 413,065: 54.9%
  By VMT:   248,362 / 1,716,421: 14.5%


## Number and share of refueling trips by number of trips and VMT

In [41]:
refuel_new = new_df["trip_type"].astype(str).str.lower() == "refuel"
refuel_old = pd.to_numeric(old_df[" destinationPurpose"], errors="coerce") == 4

old_bins_ref = old_df.loc[refuel_old, "startPeriod"].dropna().astype(int)
new_bins_ref = new_df.loc[refuel_new, "depart_bin"].dropna().astype(int)

old_counts_ref = old_bins_ref.value_counts().sort_index().reindex(
    pd.RangeIndex(0, (old_bins_ref.max() if len(old_bins_ref) else -1) + 1), fill_value=0
)
new_counts_ref = new_bins_ref.value_counts().sort_index().reindex(
    pd.RangeIndex(0, (new_bins_ref.max() if len(new_bins_ref) else -1) + 1), fill_value=0
)

old_rate_ref = old_counts_ref * (60 / 5)
new_rate_ref = new_counts_ref * (60 / 10)

old_rate_time = start_time + pd.to_timedelta(old_rate_ref.index * 5, unit="m")
new_rate_time = start_time + pd.to_timedelta(new_rate_ref.index * 10, unit="m")

fig = go.Figure()
fig.add_trace(go.Scatter(x=old_rate_time, y=old_rate_ref.values, mode="lines",
                         name="Old refuel trips/hour"))
fig.add_trace(go.Scatter(x=new_rate_time, y=new_rate_ref.values, mode="lines",
                         name="New refuel trips/hour"))
fig.update_layout(
    title="Item 6 — Refueling Trips (per hour)",
    xaxis_title="Time of Day",
    yaxis_title="Trips per hour",
    hovermode="x unified",
    xaxis=dict(tickformat="%H:%M")
)
fig.show()

new_trips = new_trips.copy()
old_trips = old_trips.copy()
new_trips["is_refuel"] = refuel_new.values
old_trips["is_refuel"] = refuel_old.values

n_ref_new = int(new_trips["is_refuel"].sum())
n_ref_old = int(old_trips["is_refuel"].sum())
tot_new = len(new_trips)
tot_old = len(old_trips)
share_ref_trips_new = n_ref_new / tot_new
share_ref_trips_old = n_ref_old / tot_old

ref_vmt_new = new_trips.loc[new_trips["is_refuel"], "trip_dist"].sum()
ref_vmt_old = old_trips.loc[old_trips["is_refuel"], "trip_dist"].sum()
tot_vmt_new = new_trips["trip_dist"].sum()
tot_vmt_old = old_trips["trip_dist"].sum()
share_ref_vmt_new = ref_vmt_new / tot_vmt_new
share_ref_vmt_old = ref_vmt_old / tot_vmt_old

print("Item 6 — Refueling trips")
print("OLD")
print(f"  By trips: {n_ref_old:,} / {tot_old:,}  → {share_ref_trips_old:,.2%}")
print(f"  By VMT:   {ref_vmt_old:,.0f} / {tot_vmt_old:,.0f}  → {share_ref_vmt_old:,.2%}")
print("NEW")
print(f"  By trips: {n_ref_new:,} / {tot_new:,}  → {share_ref_trips_new:,.2%}")
print(f"  By VMT:   {ref_vmt_new:,.0f} / {tot_vmt_new:,.0f}  → {share_ref_vmt_new:,.2%}")

Item 6 — Refueling trips
OLD
  By trips: 2 / 381,347  → 0.00%
  By VMT:   90 / 1,729,081  → 0.01%
NEW
  By trips: 40,863 / 413,065  → 9.89%
  By VMT:   48,012 / 1,716,421  → 2.80%


In [42]:
# Trips/hour by trip type — separate OLD vs NEW plots (same y-axis)

# NEW: trip_type has pickup / dropoff / refuel
tt = new_df["trip_type"].astype(str).str.lower()
new_pickup   = tt.eq("pickup")
new_dropoff  = tt.eq("dropoff")
new_refuel   = tt.eq("refuel")

# OLD: destinationPurpose -> 1/3 = pickup, 2 = dropoff, 4 = refuel
dp = pd.to_numeric(old_df[" destinationPurpose"], errors="coerce")
old_pickup   = dp.isin([1,3])
old_dropoff  = dp.eq(2)
old_refuel   = dp.eq(4)

# Counts → trips/hour
new_pk = new_df.loc[new_pickup,  "depart_bin"].dropna().astype(int).value_counts().sort_index()
new_do = new_df.loc[new_dropoff, "depart_bin"].dropna().astype(int).value_counts().sort_index()
new_rf = new_df.loc[new_refuel,  "depart_bin"].dropna().astype(int).value_counts().sort_index()
if len(new_pk): new_pk = new_pk.reindex(pd.RangeIndex(0, new_pk.index.max()+1), fill_value=0)
if len(new_do): new_do = new_do.reindex(pd.RangeIndex(0, new_do.index.max()+1), fill_value=0)
if len(new_rf): new_rf = new_rf.reindex(pd.RangeIndex(0, new_rf.index.max()+1), fill_value=0)
new_pk_rate, new_do_rate, new_rf_rate = new_pk*(60/10), new_do*(60/10), new_rf*(60/10)

old_pk = old_df.loc[old_pickup,  "startPeriod"].dropna().astype(int).value_counts().sort_index()
old_do = old_df.loc[old_dropoff, "startPeriod"].dropna().astype(int).value_counts().sort_index()
old_rf = old_df.loc[old_refuel,  "startPeriod"].dropna().astype(int).value_counts().sort_index()
if len(old_pk): old_pk = old_pk.reindex(pd.RangeIndex(0, old_pk.index.max()+1), fill_value=0)
if len(old_do): old_do = old_do.reindex(pd.RangeIndex(0, old_do.index.max()+1), fill_value=0)
if len(old_rf): old_rf = old_rf.reindex(pd.RangeIndex(0, old_rf.index.max()+1), fill_value=0)
old_pk_rate, old_do_rate, old_rf_rate = old_pk*(60/5), old_do*(60/5), old_rf*(60/5)

# Common y-axis
ymax = float(pd.concat([new_pk_rate,new_do_rate,new_rf_rate,old_pk_rate,old_do_rate,old_rf_rate]).max())*1.05

# OLD plot
fig_old = go.Figure()
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(old_pk_rate.index*5, unit="m"),
                             y=old_pk_rate.values, mode="lines", name="Pickup/hr (reposition)"))
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(old_rf_rate.index*5, unit="m"),
                             y=old_rf_rate.values, mode="lines", name="Refuel/hr"))
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(old_do_rate.index*5, unit="m"),
                             y=old_do_rate.values, mode="lines", name="Dropoff/hr (serviced)"))
fig_old.update_layout(title="OLD — Trips per Hour by Trip Type",
                      xaxis_title="Time of Day", yaxis_title="Trips per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_old.show()

# NEW plot
fig_new = go.Figure()
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(new_pk_rate.index*10, unit="m"),
                             y=new_pk_rate.values, mode="lines", name="Pickup/hr (reposition)"))
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(new_rf_rate.index*10, unit="m"),
                             y=new_rf_rate.values, mode="lines", name="Refuel/hr"))
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(new_do_rate.index*10, unit="m"),
                             y=new_do_rate.values, mode="lines", name="Dropoff/hr (serviced)"))
fig_new.update_layout(title="NEW — Trips per Hour by Trip Type",
                      xaxis_title="Time of Day", yaxis_title="Trips per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_new.show()


In [44]:
# Trips/hour by trip type — separate OLD vs NEW plots (same y-axis)

# NEW
trip_type = new_df["trip_type"].astype(str).str.lower()
is_deadhead = new_df["is_deadhead"] == True
reposition_new = trip_type.eq("pickup") & is_deadhead
servicing_new = (trip_type.eq("pickup") & ~is_deadhead) | trip_type.eq("dropoff")
refueling_new = trip_type.eq("refuel")

# OLD
dp = pd.to_numeric(old_df[" destinationPurpose"], errors="coerce")
occ = pd.to_numeric(old_df["occupancy"], errors="coerce").fillna(0)
is_deadhead_old = (occ == 0) & old_df["dropoffIdsAtOrigin"].notna() & old_df["pickupIdsAtOrigin"].isna()
reposition_old = dp.isin([1, 3]) & is_deadhead_old
servicing_old = (dp.isin([1, 3]) & ~is_deadhead_old) | dp.eq(2)
refueling_old = dp.eq(4)

def rate(df, mask, col, width):
    bins = df.loc[mask, col].dropna().astype(int)
    if not len(bins): return pd.Series(dtype=float)
    counts = bins.value_counts().sort_index().reindex(pd.RangeIndex(0, bins.max() + 1), fill_value=0)
    return counts * (60 / width)

# NEW
rate_rep_new = rate(new_df, reposition_new, "depart_bin", 10)
rate_srv_new = rate(new_df, servicing_new, "depart_bin", 10)
rate_ref_new = rate(new_df, refueling_new, "depart_bin", 10)
# OLD
rate_rep_old = rate(old_df, reposition_old, "startPeriod", 5)
rate_srv_old = rate(old_df, servicing_old, "startPeriod", 5)
rate_ref_old = rate(old_df, refueling_old, "startPeriod", 5)

ymax = float(pd.concat([
    rate_rep_new, rate_srv_new, rate_ref_new,
    rate_rep_old, rate_srv_old, rate_ref_old
]).max()) * 1.05

# OLD plot
fig_old = go.Figure()
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_rep_old.index*5, unit="m"),
                             y=rate_rep_old.values, mode="lines", name="Repositioning"))
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_srv_old.index*5, unit="m"),
                             y=rate_srv_old.values, mode="lines", name="Servicing"))
fig_old.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_ref_old.index*5, unit="m"),
                             y=rate_ref_old.values, mode="lines", name="Refueling"))
fig_old.update_layout(title="OLD — Trips per Hour by Type",
                      xaxis_title="Time of Day", yaxis_title="Trips per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_old.show()

# NEW plot
fig_new = go.Figure()
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_rep_new.index*10, unit="m"),
                             y=rate_rep_new.values, mode="lines", name="Repositioning"))
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_srv_new.index*10, unit="m"),
                             y=rate_srv_new.values, mode="lines", name="Servicing"))
fig_new.add_trace(go.Scatter(x=start_time+pd.to_timedelta(rate_ref_new.index*10, unit="m"),
                             y=rate_ref_new.values, mode="lines", name="Refueling"))
fig_new.update_layout(title="NEW — Trips per Hour by Type",
                      xaxis_title="Time of Day", yaxis_title="Trips per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_new.show()


In [45]:
# VMT/hour by trip type — separate OLD vs NEW plots with same y-axis

start_time = pd.Timestamp("2000-01-01 03:00:00")

# --- Categorize trips ---
# NEW: only say Repositioning, Servicing, Refueling; pickup but NOT deadhead => Servicing
tt = new_df["trip_type"].astype(str).str.lower()
new_is_deadhead = (new_df["is_deadhead"] == True)
new_reposition  = tt.eq("pickup") & new_is_deadhead
new_servicing   = tt.eq("dropoff") | (tt.eq("pickup") & ~new_is_deadhead)
new_refueling   = tt.eq("refuel")

# OLD: destinationPurpose (1/3=pickup, 2=dropoff, 4=refuel); pickup but NOT deadhead => Servicing
dp  = pd.to_numeric(old_df[" destinationPurpose"], errors="coerce")
occ = pd.to_numeric(old_df["occupancy"], errors="coerce").fillna(0)
old_is_deadhead = (occ == 0) & old_df["dropoffIdsAtOrigin"].notna() & old_df["pickupIdsAtOrigin"].isna()
old_reposition  = dp.isin([1,3]) & old_is_deadhead
old_servicing   = dp.eq(2) | (dp.isin([1,3]) & ~old_is_deadhead)
old_refueling   = dp.eq(4)

# ensure time columns on trip-distance tables
new_trips = new_trips.copy(); new_trips["depart_bin"]  = new_df["depart_bin"].values
old_trips = old_trips.copy(); old_trips["startPeriod"] = old_df["startPeriod"].values

# --- VMT/hour per type ---
# NEW (10-min bins)
tmp = new_trips.loc[new_reposition, ["depart_bin","trip_dist"]].dropna()
vmt_rep_new = tmp.groupby(tmp["depart_bin"].astype(int))["trip_dist"].sum().sort_index()
tmp = new_trips.loc[new_servicing, ["depart_bin","trip_dist"]].dropna()
vmt_srv_new = tmp.groupby(tmp["depart_bin"].astype(int))["trip_dist"].sum().sort_index()
tmp = new_trips.loc[new_refueling, ["depart_bin","trip_dist"]].dropna()
vmt_ref_new = tmp.groupby(tmp["depart_bin"].astype(int))["trip_dist"].sum().sort_index()
if len(vmt_rep_new): vmt_rep_new = vmt_rep_new.reindex(pd.RangeIndex(0, vmt_rep_new.index.max()+1), fill_value=0.0)
if len(vmt_srv_new): vmt_srv_new = vmt_srv_new.reindex(pd.RangeIndex(0, vmt_srv_new.index.max()+1), fill_value=0.0)
if len(vmt_ref_new): vmt_ref_new = vmt_ref_new.reindex(pd.RangeIndex(0, vmt_ref_new.index.max()+1), fill_value=0.0)
vmt_rep_new *= (60/10); vmt_srv_new *= (60/10); vmt_ref_new *= (60/10)

# OLD (5-min bins)
tmp = old_trips.loc[old_reposition, ["startPeriod","trip_dist"]].dropna()
vmt_rep_old = tmp.groupby(tmp["startPeriod"].astype(int))["trip_dist"].sum().sort_index()
tmp = old_trips.loc[old_servicing, ["startPeriod","trip_dist"]].dropna()
vmt_srv_old = tmp.groupby(tmp["startPeriod"].astype(int))["trip_dist"].sum().sort_index()
tmp = old_trips.loc[old_refueling, ["startPeriod","trip_dist"]].dropna()
vmt_ref_old = tmp.groupby(tmp["startPeriod"].astype(int))["trip_dist"].sum().sort_index()
if len(vmt_rep_old): vmt_rep_old = vmt_rep_old.reindex(pd.RangeIndex(0, vmt_rep_old.index.max()+1), fill_value=0.0)
if len(vmt_srv_old): vmt_srv_old = vmt_srv_old.reindex(pd.RangeIndex(0, vmt_srv_old.index.max()+1), fill_value=0.0)
if len(vmt_ref_old): vmt_ref_old = vmt_ref_old.reindex(pd.RangeIndex(0, vmt_ref_old.index.max()+1), fill_value=0.0)
vmt_rep_old *= (60/5); vmt_srv_old *= (60/5); vmt_ref_old *= (60/5)

# common y-axis
ymax = float(pd.concat([vmt_rep_new, vmt_srv_new, vmt_ref_new, vmt_rep_old, vmt_srv_old, vmt_ref_old]).max()) * 1.05

# OLD plot
fig_old = go.Figure()
fig_old.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_rep_old.index * 5, unit="m"),
                             y=vmt_rep_old.values, mode="lines", name="Repositioning"))
fig_old.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_srv_old.index * 5, unit="m"),
                             y=vmt_srv_old.values, mode="lines", name="Servicing"))
fig_old.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_ref_old.index * 5, unit="m"),
                             y=vmt_ref_old.values, mode="lines", name="Refueling"))
fig_old.update_layout(title="OLD — VMT per Hour by Trip Type",
                      xaxis_title="Time of Day", yaxis_title="VMT per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_old.show()

# NEW plot
fig_new = go.Figure()
fig_new.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_rep_new.index * 10, unit="m"),
                             y=vmt_rep_new.values, mode="lines", name="Repositioning"))
fig_new.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_srv_new.index * 10, unit="m"),
                             y=vmt_srv_new.values, mode="lines", name="Servicing"))
fig_new.add_trace(go.Scatter(x=start_time + pd.to_timedelta(vmt_ref_new.index * 10, unit="m"),
                             y=vmt_ref_new.values, mode="lines", name="Refueling"))
fig_new.update_layout(title="NEW — VMT per Hour by Trip Type",
                      xaxis_title="Time of Day", yaxis_title="VMT per hour",
                      hovermode="x unified", xaxis=dict(tickformat="%H:%M"),
                      yaxis=dict(range=[0, ymax]))
fig_new.show()


In [46]:
# Add: total distance and share by trip type (using trip_dist from earlier join)
# assumes new_trips and old_trips already have 'trip_dist' (from skim lookup)

# --- NEW ---
reposition_new_dist = new_trips.loc[reposition_new, "trip_dist"].sum()
servicing_new_dist  = new_trips.loc[servicing_new,  "trip_dist"].sum()
refueling_new_dist  = new_trips.loc[refueling_new,  "trip_dist"].sum()
total_new_dist = reposition_new_dist + servicing_new_dist + refueling_new_dist

# --- OLD ---
reposition_old_dist = old_trips.loc[reposition_old, "trip_dist"].sum()
servicing_old_dist  = old_trips.loc[servicing_old,  "trip_dist"].sum()
refueling_old_dist  = old_trips.loc[refueling_old,  "trip_dist"].sum()
total_old_dist = reposition_old_dist + servicing_old_dist + refueling_old_dist

print("OLD — Total VMT by trip type")
print(f"  Repositioning: {reposition_old_dist:,.0f}  ({reposition_old_dist/total_old_dist:,.2%})")
print(f"  Servicing:     {servicing_old_dist:,.0f}  ({servicing_old_dist/total_old_dist:,.2%})")
print(f"  Refueling:     {refueling_old_dist:,.0f}  ({refueling_old_dist/total_old_dist:,.2%})")
print(f"  Total:         {total_old_dist:,.0f}\n")

print("NEW — Total VMT by trip type")
print(f"  Repositioning: {reposition_new_dist:,.0f}  ({reposition_new_dist/total_new_dist:,.2%})")
print(f"  Servicing:     {servicing_new_dist:,.0f}  ({servicing_new_dist/total_new_dist:,.2%})")
print(f"  Refueling:     {refueling_new_dist:,.0f}  ({refueling_new_dist/total_new_dist:,.2%})")
print(f"  Total:         {total_new_dist:,.0f}")


OLD — Total VMT by trip type
  Repositioning: 593,419  (34.32%)
  Servicing:     1,135,572  (65.67%)
  Refueling:     90  (0.01%)
  Total:         1,729,081

NEW — Total VMT by trip type
  Repositioning: 200,350  (11.67%)
  Servicing:     1,468,059  (85.53%)
  Refueling:     48,012  (2.80%)
  Total:         1,716,421
