## Trim Observed Demand for Congested Assignment
Emme's congested assignment procedures fail if you assign demand for which there is no path. Because the observed demand is based on survey data, it is likely a small number of trips observed in the survey data can not be completed in the model network. This notebook uses skims from an extended assignment (not congested, which will not fail if there is no path for the demand) to trim the observed demand so that it can be assigned in the congested assignment without error.


In [18]:
import numpy as np
import pandas as pd
import openmatrix as omx
import os

### Remote I/O

In [19]:
observed_demand_filename = r"/Users/wsp/Downloads/observed-demand-year-2015-am-emme-taz-by-path.csv"
skim_dir = "../examples/temp_acceptance/skims"
output_demand_filename = "../examples/temp_on_board_assign/observed-demand-year-2015-am-emme-taz-by-path-trimmed.csv"

### Methods

In [20]:
def make_dataframe_from_omx(input_mtx: omx, core_name: str):

    a = np.array(input_mtx)

    df = pd.DataFrame(a)
    df = (
        df.unstack()
        .reset_index()
        .rename(
            columns={"level_0": "origin", "level_1": "destination", 0: core_name}
        )
    )
    df["origin"] = df["origin"] + 1
    df["destination"] = df["destination"] + 1

    return df

In [21]:
def fetch_skim_values(path: str, time_period: str):
    filename = os.path.join(skim_dir, "trnskm{}_{}.omx".format(time_period.upper(), path))
    omx_handle = omx.open_file(filename)

    matrix_list = ["IVT", "WAUX", "BOARDS", "WACC", "WEGR"]
 
    first_matrix = True
    for matrix in matrix_list:
        matrix_name = time_period + "_" + path + "_" + matrix
        df = make_dataframe_from_omx(omx_handle[matrix_name], matrix_name)
        df.columns = ["origin", "destination", matrix]
        if first_matrix:
            running_df = df
            first_matrix = False
        else:
            running_df = running_df.merge(df, on=["origin", "destination"])

    omx_handle.close()
    running_df["path"] = path.lower()
    running_df["time_period"] = time_period.lower()

    return running_df

In [22]:
def identify_present_paths(input_df: pd.DataFrame):
    df = input_df.copy()
    df["walk_all_the_way_time"] = np.where(df["IVT"] > 0.01, -99.0, df["WAUX"] + df["WACC"] + df["WEGR"])
    df["transit_path_found"] = df["IVT"] > 0.01
    df["walk_path_found"] = df["walk_all_the_way_time"] > 0.0
    df["path_found"] = (df["transit_path_found"] | df["walk_path_found"])
    
    return df[["origin", "destination", "path", "time_period", "path_found", "transit_path_found", "walk_path_found"]].copy()
    

In [23]:
def flag_path_in_observed(input_observed_df: pd.DataFrame, input_path: str, input_time_period: str):
    o_df = input_observed_df.copy()

    df = fetch_skim_values(input_path, input_time_period)
    a_df = identify_present_paths(df)

    r_df = pd.merge(o_df, a_df, left_on = ["model_time", "path_type", "orig_emme_taz", "dest_emme_taz"], right_on=["time_period", "path", "origin", "destination"], how="left").reset_index(drop=True)
    r_df = r_df.drop(columns=["origin", "destination", "path", "time_period"])
    r_df["path_found"] = r_df["path_found"].fillna(False)
    r_df["transit_path_found"] = r_df["transit_path_found"].fillna(False)
    r_df["walk_path_found"] = r_df["walk_path_found"].fillna(False)
    r_df = r_df.rename(columns={
        "path_found": "path_found_{}".format(input_path.lower()), 
        "transit_path_found": "transit_path_found_{}".format(input_path.lower()), 
        "walk_path_found": "walk_path_found_{}".format(input_path.lower())
    })

    return r_df

### Data Reads

In [24]:
observed_df = pd.read_csv(observed_demand_filename) 

In [25]:
observed_df.value_counts("path_type")

path_type
wlk_trn_wlk    21751
pnr_trn_wlk     5981
knr_trn_wlk     4515
wlk_trn_knr      932
wlk_trn_pnr      149
dtype: int64

In [26]:
running_observed_df = flag_path_in_observed(observed_df, "WLK_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, "PNR_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, "KNR_TRN_WLK", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, "WLK_TRN_PNR", "AM")
running_observed_df = flag_path_in_observed(running_observed_df, "WLK_TRN_KNR", "AM")
running_observed_df.head()

Unnamed: 0,model_time,path_type,orig_emme_taz,dest_emme_taz,trips,path_found_wlk_trn_wlk,transit_path_found_wlk_trn_wlk,walk_path_found_wlk_trn_wlk,path_found_pnr_trn_wlk,transit_path_found_pnr_trn_wlk,walk_path_found_pnr_trn_wlk,path_found_knr_trn_wlk,transit_path_found_knr_trn_wlk,walk_path_found_knr_trn_wlk,path_found_wlk_trn_pnr,transit_path_found_wlk_trn_pnr,walk_path_found_wlk_trn_pnr,path_found_wlk_trn_knr,transit_path_found_wlk_trn_knr,walk_path_found_wlk_trn_knr
0,am,knr_trn_wlk,1,2953,12.064273,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
1,am,knr_trn_wlk,2,1055,5.72125,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
2,am,knr_trn_wlk,3,2592,1.040014,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
3,am,knr_trn_wlk,5,2178,4.855805,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False
4,am,knr_trn_wlk,7,398,20.209921,False,False,False,False,False,False,True,True,False,False,False,False,False,False,False


In [27]:
output_df = running_observed_df.copy() 
output_df["path_present"] = output_df["path_found_wlk_trn_wlk"] | output_df["path_found_pnr_trn_wlk"] | output_df["path_found_knr_trn_wlk"] | output_df["path_found_wlk_trn_pnr"] | output_df["path_found_wlk_trn_knr"]
output_df["transit_path_present"] = output_df["transit_path_found_wlk_trn_wlk"] | output_df["transit_path_found_pnr_trn_wlk"] | output_df["transit_path_found_knr_trn_wlk"] | output_df["transit_path_found_wlk_trn_pnr"] | output_df["transit_path_found_wlk_trn_knr"]
output_df["walk_path_present"] = output_df["walk_path_found_wlk_trn_wlk"] | output_df["walk_path_found_pnr_trn_wlk"] | output_df["walk_path_found_knr_trn_wlk"] | output_df["walk_path_found_wlk_trn_pnr"] | output_df["walk_path_found_wlk_trn_knr"]
output_df.value_counts("path_present")  

path_present
True     31789
False     1539
dtype: int64

In [28]:
write_df = output_df[output_df["path_present"] == True].copy().reset_index(drop=True)
write_df = write_df[["model_time", "path_type", "orig_emme_taz", "dest_emme_taz", "trips"]]

In [29]:
sum(write_df["trips"])/sum(write_df["trips"])

1.0

In [30]:
write_df.to_csv(output_demand_filename, index=False)

### Count Transit and Walk-all-the-way Paths

In [31]:
path_type_df = output_df[output_df["path_present"] == True].copy().reset_index(drop=True)
path_type_df = path_type_df[["model_time", "path_type", "path_present", "transit_path_present", "walk_path_present", "trips"]]
path_type_df["transit_trips"] = path_type_df["transit_path_present"] * path_type_df["trips"]
path_type_df["walk_trips"] = path_type_df["walk_path_present"] * path_type_df["trips"]


In [32]:
sum(path_type_df["transit_trips"])/sum(path_type_df["trips"])

0.8148224867767266

In [33]:
sum(path_type_df["walk_trips"])/sum(path_type_df["trips"])

0.18517751322326859

In [34]:
(sum(path_type_df["walk_trips"]) + sum(path_type_df["transit_trips"]))/sum(path_type_df["trips"])

0.9999999999999952