## Trim Observed Demand for Congested Assignment
Emme's congested assignment procedures fail if you assign demand for which there is no path. Because the observed demand is based on survey data, it is likely a small number of trips observed in the survey data can not be completed in the model network. This notebook uses skims from an extended assignment (not congested, which will not fail if there is no path for the demand) to trim the observed demand so that it can be assigned in the congested assignment without error.


In [86]:
import numpy as np
import pandas as pd
import openmatrix as omx
import os
import math

### Remote I/O

In [87]:
observed_demand_filename = r"/Users/wsp/Downloads/observed-demand-year-2015-am-emme-taz-by-path.csv"
skim_dir = "../examples/temp_acceptance/skims"
total_impedance_skim_filename = "transit_total_impedance_am.omx"
roadway_skim_filename = "HWYSKMMD_taz.omx"
output_demand_filename = "../examples/temp_on_board_assign/observed-demand-year-2015-am-emme-taz-by-path-trimmed.csv"

### Parameters

In [88]:
MAX_IMPEDANCE = 150.0

### Methods

In [89]:
def make_dataframe_from_omx(input_mtx: omx, core_name: str):

    a = np.array(input_mtx)

    df = pd.DataFrame(a)
    df = (
        df.unstack()
        .reset_index()
        .rename(
            columns={"level_0": "origin", "level_1": "destination", 0: core_name}
        )
    )
    df["origin"] = df["origin"] + 1
    df["destination"] = df["destination"] + 1

    return df

In [90]:
def fetch_skim_values(path: str, time_period: str):
    filename = os.path.join(skim_dir, "trnskm{}_{}.omx".format(time_period.upper(), path))
    omx_handle = omx.open_file(filename)

    matrix_list = ["IVT", "WAUX", "BOARDS", "WACC", "WEGR"]
 
    first_matrix = True
    for matrix in matrix_list:
        matrix_name = time_period + "_" + path + "_" + matrix
        df = make_dataframe_from_omx(omx_handle[matrix_name], matrix_name)
        df.columns = ["origin", "destination", matrix]
        if first_matrix:
            running_df = df
            first_matrix = False
        else:
            running_df = running_df.merge(df, on=["origin", "destination"])

    omx_handle.close()
    running_df["path_type"] = path.lower()
    running_df["time_period"] = time_period.lower()

    return running_df

In [91]:
def identify_present_paths(input_df: pd.DataFrame, impedance_df: pd.DataFrame):
    df = input_df.copy()
    df = df.merge(impedance_df, on=["origin", "destination", "path_type"], how="left").reset_index(drop=True)
    df["walk_all_the_way_time"] = np.where(df["IVT"] > 0.01, -99.0, df["WAUX"] + df["WACC"] + df["WEGR"])
    df["transit_path_found"] = df["IVT"] > 0.01
    df["walk_path_found"] = df["walk_all_the_way_time"] > 0.0
    df["path_found"] = ((df["total_imped"] > 0.0) & (df["total_imped"] < MAX_IMPEDANCE))
    return df[["origin", "destination", "path_type", "time_period", "path_found", "transit_path_found", "walk_path_found"]].copy()
    

In [92]:
def flag_path_in_observed(input_observed_df: pd.DataFrame, input_total_imped_df: pd.DataFrame, input_path: str, input_time_period: str):
    o_df = input_observed_df.copy()
    imp_df = input_total_imped_df.copy()

    df = fetch_skim_values(input_path, input_time_period)
    a_df = identify_present_paths(df, imp_df)

    r_df = pd.merge(
        o_df, 
        a_df, 
        left_on = ["model_time", "path_type", "orig_emme_taz", "dest_emme_taz"], 
        right_on=["time_period", "path_type", "origin", "destination"], how="left",
    ).reset_index(drop=True)
    r_df = r_df.drop(columns=["origin", "destination", "time_period"])
    
    r_df["path_found"] = r_df["path_found"].fillna(False)
    r_df["transit_path_found"] = r_df["transit_path_found"].fillna(False)
    r_df["walk_path_found"] = r_df["walk_path_found"].fillna(False)
    
    r_df = r_df.rename(columns={
        "path_found": "path_found_{}".format(input_path.lower()), 
        "transit_path_found": "transit_path_found_{}".format(input_path.lower()), 
        "walk_path_found": "walk_path_found_{}".format(input_path.lower()),
    })

    return r_df


In [93]:
def weighted_outcomes(values, weights):
    average = np.average(values, weights=weights)
    variance = np.average((values-average)**2, weights=weights)
    out = {"count": values.size,
           "trips": weights.sum(),
            "mean": average, 
            "std": math.sqrt(variance),
            "min": min(values),
            "25%": np.percentile(values, 25),
            "50%": np.percentile(values, 50),
            "75%": np.percentile(values, 75),
            "max": max(values)
        }
    ser = pd.Series(data=out, index=out.keys())
    return ser

### Data Reads

In [94]:
observed_df = pd.read_csv(observed_demand_filename) 

In [95]:
observed_df.value_counts("path_type")

path_type
wlk_trn_wlk    21751
pnr_trn_wlk     5981
knr_trn_wlk     4515
wlk_trn_knr      932
wlk_trn_pnr      149
dtype: int64

In [96]:
filename = os.path.join(skim_dir, total_impedance_skim_filename)
omx_handle = omx.open_file(filename)
omx_handle.list_matrices()

['mf601_AM_WLK_TRN_WLK_TOTALIMP',
 'mf602_AM_PNR_TRN_WLK_TOTALIMP',
 'mf603_AM_WLK_TRN_PNR_TOTALIMP',
 'mf604_AM_KNR_TRN_WLK_TOTALIMP',
 'mf605_AM_WLK_TRN_KNR_TOTALIMP']

In [97]:
wtw_df = make_dataframe_from_omx(omx_handle["mf601_AM_WLK_TRN_WLK_TOTALIMP"], "total_imped")
wtw_df["path_type"] = "wlk_trn_wlk"

ptw_df = make_dataframe_from_omx(omx_handle["mf602_AM_PNR_TRN_WLK_TOTALIMP"], "total_imped")
ptw_df["path_type"] = "pnr_trn_wlk"

ktw_df = make_dataframe_from_omx(omx_handle["mf604_AM_KNR_TRN_WLK_TOTALIMP"], "total_imped")
ktw_df["path_type"] = "knr_trn_wlk"

wtp_df = make_dataframe_from_omx(omx_handle["mf603_AM_WLK_TRN_PNR_TOTALIMP"], "total_imped")
wtp_df["path_type"] = "wlk_trn_pnr"

wtk_df = make_dataframe_from_omx(omx_handle["mf605_AM_WLK_TRN_KNR_TOTALIMP"], "total_imped")
wtk_df["path_type"] = "wlk_trn_knr"

omx_handle.close()


In [98]:
filename = os.path.join(skim_dir, roadway_skim_filename)
omx_handle = omx.open_file(filename)
hwy_dist_df = make_dataframe_from_omx(omx_handle["MD_da_dist"], "roadway_distance")
omx_handle.close()

In [99]:
running_observed_df = flag_path_in_observed(observed_df, wtw_df, "WLK_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, ptw_df, "PNR_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, ktw_df, "KNR_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, wtp_df, "WLK_TRN_PNR", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, wtk_df, "WLK_TRN_KNR", "AM")
running_observed_df.head()

Unnamed: 0,model_time,path_type,orig_emme_taz,dest_emme_taz,trips,path_found_wlk_trn_wlk,transit_path_found_wlk_trn_wlk,walk_path_found_wlk_trn_wlk,path_found_pnr_trn_wlk,transit_path_found_pnr_trn_wlk,walk_path_found_pnr_trn_wlk,path_found_knr_trn_wlk,transit_path_found_knr_trn_wlk,walk_path_found_knr_trn_wlk,path_found_wlk_trn_pnr,transit_path_found_wlk_trn_pnr,walk_path_found_wlk_trn_pnr,path_found_wlk_trn_knr,transit_path_found_wlk_trn_knr,walk_path_found_wlk_trn_knr
0,am,knr_trn_wlk,1,2953,12.064273,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
1,am,knr_trn_wlk,2,1055,5.72125,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
2,am,knr_trn_wlk,3,2592,1.040014,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
3,am,knr_trn_wlk,5,2178,4.855805,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
4,am,knr_trn_wlk,7,398,20.209921,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False


### Reductions

In [100]:
output_df = running_observed_df.copy() 
output_df["path_present"] = output_df["path_found_wlk_trn_wlk"] | output_df["path_found_pnr_trn_wlk"] | output_df["path_found_knr_trn_wlk"] | output_df["path_found_wlk_trn_pnr"] | output_df["path_found_wlk_trn_knr"]
output_df["transit_path_present"] = output_df["transit_path_found_wlk_trn_wlk"] | output_df["transit_path_found_pnr_trn_wlk"] | output_df["transit_path_found_knr_trn_wlk"] | output_df["transit_path_found_wlk_trn_pnr"] | output_df["transit_path_found_wlk_trn_knr"]
output_df["walk_path_present"] = output_df["walk_path_found_wlk_trn_wlk"] | output_df["walk_path_found_pnr_trn_wlk"] | output_df["walk_path_found_knr_trn_wlk"] | output_df["walk_path_found_wlk_trn_pnr"] | output_df["walk_path_found_wlk_trn_knr"]
output_df.value_counts("path_present")  

path_present
True     26599
False     6729
dtype: int64

In [101]:
write_df = output_df[output_df["path_present"] == True].copy().reset_index(drop=True)
write_df = write_df[["model_time", "path_type", "orig_emme_taz", "dest_emme_taz", "trips"]]

In [102]:
sum(write_df["trips"])/sum(output_df["trips"])

0.8546194055651156

In [103]:
sum(write_df["trips"])

315848.4381569526

In [104]:
write_df.to_csv(output_demand_filename, index=False)

### Count Transit and Walk-all-the-way Paths

In [105]:
path_type_df = output_df[output_df["path_present"] == True].copy().reset_index(drop=True)
path_type_df = path_type_df[["model_time", "path_type", "orig_emme_taz", "dest_emme_taz", "path_present", "transit_path_present", "walk_path_present", "trips"]]
path_type_df["transit_trips"] = path_type_df["transit_path_present"] * path_type_df["trips"]
path_type_df["walk_trips"] = path_type_df["walk_path_present"] * path_type_df["trips"]
path_type_df = pd.merge(path_type_df, hwy_dist_df, left_on=["orig_emme_taz", "dest_emme_taz"], right_on=["origin", "destination"], how="left")
path_type_df = path_type_df.drop(columns=["origin", "destination"])
path_type_df.head()


Unnamed: 0,model_time,path_type,orig_emme_taz,dest_emme_taz,path_present,transit_path_present,walk_path_present,trips,transit_trips,walk_trips,roadway_distance
0,am,knr_trn_wlk,1,2953,True,True,False,12.064273,12.064273,0.0,11.509501
1,am,knr_trn_wlk,2,1055,True,True,False,5.72125,5.72125,0.0,12.797618
2,am,knr_trn_wlk,3,2592,True,True,False,1.040014,1.040014,0.0,31.540909
3,am,knr_trn_wlk,5,2178,True,True,False,4.855805,4.855805,0.0,12.2293
4,am,knr_trn_wlk,7,398,True,True,False,20.209921,20.209921,0.0,3.452892


In [106]:
sum(path_type_df["transit_trips"])/sum(path_type_df["trips"])

1.0

In [107]:
sum(path_type_df["walk_trips"])/sum(path_type_df["trips"])

0.0

In [108]:
(sum(path_type_df["walk_trips"]) + sum(path_type_df["transit_trips"]))/sum(path_type_df["trips"])

1.0

In [109]:
path_type_df["roadway_distance"].describe()

count    26599.000000
mean        10.786821
std          8.886543
min          0.187586
25%          3.874539
50%          7.973752
75%         15.150299
max         55.445045
Name: roadway_distance, dtype: float64

In [110]:
weighted_outcomes(path_type_df["roadway_distance"], path_type_df["trips"])

count     26599.000000
trips    315848.438157
mean          8.332068
std           7.807270
min           0.187586
25%           3.874539
50%           7.973752
75%          15.150299
max          55.445045
dtype: float64

In [111]:
df = path_type_df[path_type_df["transit_trips"] > 0.0].copy()
weighted_outcomes(df["roadway_distance"], df["transit_trips"])

count     26599.000000
trips    315848.438157
mean          8.332068
std           7.807270
min           0.187586
25%           3.874539
50%           7.973752
75%          15.150299
max          55.445045
dtype: float64

In [112]:
## df = path_type_df[path_type_df["walk_trips"] > 0.0].copy()
## weighted_outcomes(df["roadway_distance"], df["walk_trips"])

### Count Transit and Walk-all-the-way Paths for Within San Francisco Trips

In [113]:
MAX_SF_EMME_TAZ = 636
sf_path_type_df = path_type_df[(path_type_df["orig_emme_taz"]<=MAX_SF_EMME_TAZ) & (path_type_df["dest_emme_taz"]<=MAX_SF_EMME_TAZ)].copy().reset_index(drop=True)
sum(sf_path_type_df["transit_trips"])/sum(sf_path_type_df["trips"])

1.0

In [114]:
sum(sf_path_type_df["walk_trips"])/sum(sf_path_type_df["trips"])

0.0

In [115]:
df = sf_path_type_df[sf_path_type_df["trips"] > 0.0].copy()
weighted_outcomes(df["roadway_distance"], df["trips"])

count      9357.000000
trips    152755.060269
mean          3.981575
std           2.256317
min           0.187586
25%           2.440529
50%           3.946632
75%           5.962946
max          12.886116
dtype: float64

In [116]:
df = sf_path_type_df[sf_path_type_df["transit_trips"] > 0.0].copy()
weighted_outcomes(df["roadway_distance"], df["transit_trips"])

count      9357.000000
trips    152755.060269
mean          3.981575
std           2.256317
min           0.187586
25%           2.440529
50%           3.946632
75%           5.962946
max          12.886116
dtype: float64

In [117]:
## df = sf_path_type_df[sf_path_type_df["walk_trips"] > 0.0].copy()
## weighted_outcomes(df["roadway_distance"], df["walk_trips"])

### Look at high impedance interchanges that are failing in the congested assignment

In [118]:
skims_df = fetch_skim_values("WLK_TRN_PNR", "AM")
skims_df.head()

Unnamed: 0,origin,destination,IVT,WAUX,BOARDS,WACC,WEGR,path_type,time_period
0,1,1,0.0,0.0,0.0,0.0,0.0,wlk_trn_pnr,am
1,1,2,5.0,0.0,1.0,1.802485,0.0,wlk_trn_pnr,am
2,1,3,5.0,0.0,1.0,6.881343,0.0,wlk_trn_pnr,am
3,1,4,40.891212,5.762158,2.0,2.035112,0.0,wlk_trn_pnr,am
4,1,5,5.0,0.0,1.0,9.645048,0.0,wlk_trn_pnr,am


In [119]:
skims_df.head()

Unnamed: 0,origin,destination,IVT,WAUX,BOARDS,WACC,WEGR,path_type,time_period
0,1,1,0.0,0.0,0.0,0.0,0.0,wlk_trn_pnr,am
1,1,2,5.0,0.0,1.0,1.802485,0.0,wlk_trn_pnr,am
2,1,3,5.0,0.0,1.0,6.881343,0.0,wlk_trn_pnr,am
3,1,4,40.891212,5.762158,2.0,2.035112,0.0,wlk_trn_pnr,am
4,1,5,5.0,0.0,1.0,9.645048,0.0,wlk_trn_pnr,am


In [120]:
df = output_df[output_df["path_type"] == "wlk_trn_pnr"].copy()
df = df[["model_time", "orig_emme_taz", "dest_emme_taz", "trips", "path_found_wlk_trn_pnr", "transit_path_found_wlk_trn_pnr"]].copy().reset_index(drop=True)
df.head()

Unnamed: 0,model_time,orig_emme_taz,dest_emme_taz,trips,path_found_wlk_trn_pnr,transit_path_found_wlk_trn_pnr
0,am,4,1020,4.35468,False,True
1,am,17,3626,3.013748,False,True
2,am,18,3634,2.681981,True,True
3,am,25,762,3.513952,True,True
4,am,27,780,7.377658,False,True


In [121]:
a_df = pd.merge(
        df, 
        skims_df[["origin", "destination", "time_period", "IVT", "BOARDS", "WACC", "WAUX", "WEGR"]], 
        left_on = ["model_time", "orig_emme_taz", "dest_emme_taz"], 
        right_on=["time_period", "origin", "destination"], how="left",
    ).reset_index(drop=True)
a_df = a_df.drop(columns=["origin", "destination", "time_period"])
a_df.head()

Unnamed: 0,model_time,orig_emme_taz,dest_emme_taz,trips,path_found_wlk_trn_pnr,transit_path_found_wlk_trn_pnr,IVT,BOARDS,WACC,WAUX,WEGR
0,am,4,1020,4.35468,False,True,20.0,1.0,11.423782,0.0,0.0
1,am,17,3626,3.013748,False,True,66.385551,2.0,10.678999,3.869747,0.0
2,am,18,3634,2.681981,True,True,48.032963,2.315567,11.996914,3.460896,0.0
3,am,25,762,3.513952,True,True,21.70153,1.0,14.477477,0.0,0.0
4,am,27,780,7.377658,False,True,51.306515,2.0,2.898661,4.719433,0.0


In [122]:
a_df["path_util"] = a_df["IVT"] + a_df["WAUX"]*2.0 + a_df["WACC"]*2.0 + a_df["WEGR"]*2.0 + 10.0 * a_df["BOARDS"]

In [123]:
b_df = pd.merge(
     a_df,
     wtp_df,
    left_on = ["orig_emme_taz", "dest_emme_taz"],
    right_on=["origin", "destination"], 
    how="left",
).reset_index(drop=True)
b_df = b_df.drop(columns=["origin", "destination"])
b_df.head()

Unnamed: 0,model_time,orig_emme_taz,dest_emme_taz,trips,path_found_wlk_trn_pnr,transit_path_found_wlk_trn_pnr,IVT,BOARDS,WACC,WAUX,WEGR,path_util,total_imped,path_type
0,am,4,1020,4.35468,False,True,20.0,1.0,11.423782,0.0,0.0,52.847565,153.691422,wlk_trn_pnr
1,am,17,3626,3.013748,False,True,66.385551,2.0,10.678999,3.869747,0.0,115.483043,169.309616,wlk_trn_pnr
2,am,18,3634,2.681981,True,True,48.032963,2.315567,11.996914,3.460896,0.0,102.104248,148.818268,wlk_trn_pnr
3,am,25,762,3.513952,True,True,21.70153,1.0,14.477477,0.0,0.0,60.656485,112.030525,wlk_trn_pnr
4,am,27,780,7.377658,False,True,51.306515,2.0,2.898661,4.719433,0.0,86.542704,182.745026,wlk_trn_pnr
