In [107]:
import openmatrix as omx
import pandas as pd
import numpy as np

## Remote I/O

In [89]:
legacy_dir = "./TM2.2.0/"
new_dir = "./TM2.2.1.1/"

maz_crosswalk_file_name = legacy_dir + "maz_data_withDensity.csv"

trip_file_name = "indivTripData_1.csv"

legacy_skim_file_name = "transit_skims_AM.omx"
new_skim_file_name = "trnskmAM_WLK_TRN_WLK.omx"

output_file_name = "before-after-tap-skim-check.csv"

## Data Reads

In [92]:
maz_crosswalk_df = pd.read_csv(maz_crosswalk_file_name, sep = ",")
maz_crosswalk_df = maz_crosswalk_df[["MAZ", "TAZ"]]
maz_crosswalk_df.head()

Unnamed: 0,MAZ,TAZ
0,1,56
1,2,56
2,3,10
3,4,53
4,5,48


In [72]:
legacy_trips_df = pd.read_csv(legacy_dir + trip_file_name)

In [73]:
legacy_trips_df.columns

Index(['hh_id', 'person_id', 'person_num', 'tour_id', 'stop_id', 'inbound',
       'tour_purpose', 'orig_purpose', 'dest_purpose', 'orig_mgra',
       'dest_mgra', 'trip_dist', 'parking_mgra', 'stop_period', 'trip_mode',
       'trip_board_tap', 'trip_alight_tap', 'tour_mode', 'set',
       'tranpath_rnum', 'sampleRate', 'avAvailable', 'TRIP_TIME',
       'TRIP_DISTANCE', 'TRIP_COST'],
      dtype='object')

In [74]:
new_trips_df = pd.read_csv(new_dir + trip_file_name)

In [75]:
def make_dataframe_from_omx(input_mtx, column_name):
    df = pd.DataFrame(input_mtx)
    df = df.add_prefix('dest_')
    df['id'] = df.index
    df = pd.wide_to_long(df, 'dest_', 'id', 'destination')
    df = df.reset_index().rename(columns = {'dest_': column_name, 'id': 'origin'})
    df['origin'] = df['origin'] + 1
    df['destination'] = df['destination'] + 1
    
    return df

In [76]:
legacy_skim = omx.open_file(legacy_dir + legacy_skim_file_name)
legacy_skim.list_matrices()

['AM_ALLPEN_CAPPEN',
 'AM_ALLPEN_CRIVTT',
 'AM_ALLPEN_CROWD',
 'AM_ALLPEN_EAWT',
 'AM_ALLPEN_EBIVTT',
 'AM_ALLPEN_FARE',
 'AM_ALLPEN_FIRSTWAIT',
 'AM_ALLPEN_FRIVTT',
 'AM_ALLPEN_HRIVTT',
 'AM_ALLPEN_IN_VEHICLE_COST',
 'AM_ALLPEN_LBIVTT',
 'AM_ALLPEN_LINKREL',
 'AM_ALLPEN_LRIVTT',
 'AM_ALLPEN_TOTALIVTT',
 'AM_ALLPEN_TOTALWAIT',
 'AM_ALLPEN_TOTALWALK',
 'AM_ALLPEN_XFERS',
 'AM_ALLPEN_XFERWAIT',
 'AM_ALLPEN_XFERWALK',
 'AM_BUS_CAPPEN',
 'AM_BUS_CRIVTT',
 'AM_BUS_CROWD',
 'AM_BUS_EAWT',
 'AM_BUS_EBIVTT',
 'AM_BUS_FARE',
 'AM_BUS_FIRSTWAIT',
 'AM_BUS_FRIVTT',
 'AM_BUS_HRIVTT',
 'AM_BUS_IN_VEHICLE_COST',
 'AM_BUS_LBIVTT',
 'AM_BUS_LINKREL',
 'AM_BUS_LRIVTT',
 'AM_BUS_TOTALIVTT',
 'AM_BUS_TOTALWAIT',
 'AM_BUS_TOTALWALK',
 'AM_BUS_XFERS',
 'AM_BUS_XFERWAIT',
 'AM_BUS_XFERWALK',
 'AM_PREM_CAPPEN',
 'AM_PREM_CRIVTT',
 'AM_PREM_CROWD',
 'AM_PREM_EAWT',
 'AM_PREM_EBIVTT',
 'AM_PREM_FARE',
 'AM_PREM_FIRSTWAIT',
 'AM_PREM_FRIVTT',
 'AM_PREM_HRIVTT',
 'AM_PREM_IN_VEHICLE_COST',
 'AM_PREM_LBIVTT',
 'A

In [99]:
legacy_ivtt_df = make_dataframe_from_omx(legacy_skim["AM_ALLPEN_TOTALIVTT"], "legacy_ivtt")
legacy_xfers_df = make_dataframe_from_omx(legacy_skim["AM_ALLPEN_XFERS"], "legacy_xfers")

In [78]:
new_skim = omx.open_file(new_dir + new_skim_file_name)
new_skim.list_matrices()

['AM_WLK_TRN_WLK_BOARDS',
 'AM_WLK_TRN_WLK_DDIST',
 'AM_WLK_TRN_WLK_DTIME',
 'AM_WLK_TRN_WLK_FARE',
 'AM_WLK_TRN_WLK_IN_VEHICLE_COST',
 'AM_WLK_TRN_WLK_IVT',
 'AM_WLK_TRN_WLK_IVTCOM',
 'AM_WLK_TRN_WLK_IVTEXP',
 'AM_WLK_TRN_WLK_IVTFRY',
 'AM_WLK_TRN_WLK_IVTHVY',
 'AM_WLK_TRN_WLK_IVTLOC',
 'AM_WLK_TRN_WLK_IVTLTR',
 'AM_WLK_TRN_WLK_IWAIT',
 'AM_WLK_TRN_WLK_WACC',
 'AM_WLK_TRN_WLK_WAIT',
 'AM_WLK_TRN_WLK_WAUX',
 'AM_WLK_TRN_WLK_WEGR',
 'AM_WLK_TRN_WLK_XWAIT']

In [97]:
new_ivtt_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_IVT"], "new_ivtt")
new_iwait_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_IWAIT"], "new_iwait")
new_xwait_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_XWAIT"], "new_xwait")
new_wacc_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_WACC"], "new_wacc")
new_waux_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_WAUX"], "new_waux")
new_wegr_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_WEGR"], "new_wegr")
new_xfer_df = make_dataframe_from_omx(new_skim["AM_WLK_TRN_WLK_BOARDS"], "new_xfer")

In [82]:
legacy_ivtt_df.head()

Unnamed: 0,origin,destination,legacy_ivtt
0,1,1,0.0
1,2,1,13.305266
2,3,1,29.605185
3,4,1,16.527559
4,5,1,45.910179


In [83]:
new_ivtt_df.head()

Unnamed: 0,origin,destination,new_ivtt
0,1,1,0.0
1,2,1,0.0
2,3,1,0.0
3,4,1,17.248226
4,5,1,0.0


## Data Reductions

In [108]:
new_los_df = pd.merge(new_ivtt_df, new_iwait_df, on = ['origin', 'destination'])
new_los_df = pd.merge(new_los_df, new_wacc_df, on = ['origin', 'destination'])
new_los_df = pd.merge(new_los_df, new_waux_df, on = ['origin', 'destination'])
new_los_df = pd.merge(new_los_df, new_wegr_df, on = ['origin', 'destination'])
new_los_df = pd.merge(new_los_df, new_xwait_df, on = ['origin', 'destination'])
new_los_df = pd.merge(new_los_df, new_xfer_df, on = ['origin', 'destination'])
new_los_df["new_skim_time"] = new_los_df["new_ivtt"] + new_los_df["new_iwait"] + new_los_df["new_wacc"] + new_los_df["new_waux"] + new_los_df["new_wegr"] + new_los_df["new_xwait"]
new_los_df["new_transfers"] = np.where(new_los_df["new_ivtt"]>0, new_los_df["new_xfer"] - 1, 0)
new_los_df = new_los_df[["origin", "destination", "new_skim_time", "new_ivtt", "new_transfers"]]
new_los_df.head()

Unnamed: 0,origin,destination,new_skim_time,new_ivtt,new_transfers
0,1,1,0.0,0.0,0.0
1,2,1,10.993132,0.0,0.0
2,3,1,5.943016,0.0,0.0
3,4,1,39.409286,17.248226,1.0
4,5,1,8.383275,0.0,0.0


In [111]:
WALK_TRANSIT_MODE = 11
RELEVANT_TIME_PERIODS = [4, 5, 6, 7, 8, 9, 10, 11]
l_df = legacy_trips_df[legacy_trips_df["trip_mode"] == WALK_TRANSIT_MODE].copy()
l_df = l_df[l_df["stop_period"].isin(RELEVANT_TIME_PERIODS)].copy()
l_df = pd.merge(l_df, legacy_ivtt_df, left_on = ["trip_board_tap", "trip_alight_tap"], right_on = ["origin", "destination"], how = "left").drop(columns = ["origin", "destination"])
l_df = pd.merge(l_df, legacy_xfers_df, left_on = ["trip_board_tap", "trip_alight_tap"], right_on = ["origin", "destination"], how = "left").drop(columns = ["origin", "destination"])
l_df = l_df.rename(columns = {"TRIP_TIME": "legacy_trip_time"})
l_df = pd.merge(l_df, maz_crosswalk_df, left_on = "orig_mgra", right_on = "MAZ", how = "left").rename(columns={"TAZ": "orig_taz"}).drop(columns = ["MAZ"])
l_df = pd.merge(l_df, maz_crosswalk_df, left_on = "dest_mgra", right_on = "MAZ", how = "left").rename(columns={"TAZ": "dest_taz"}).drop(columns = ["MAZ"])
l_df = pd.merge(l_df, new_los_df, left_on = ["orig_taz", "dest_taz"], right_on = ["origin", "destination"], how = "left").drop(columns = ["origin", "destination"])
l_df = l_df[["hh_id", "person_id", "orig_taz", "dest_taz", "trip_mode", "legacy_trip_time", "legacy_ivtt", "legacy_xfers", "new_skim_time", "new_ivtt", "new_transfers"]].copy()
l_df["trips_source"] = "legacy"
l_df.head()

Unnamed: 0,hh_id,person_id,orig_taz,dest_taz,trip_mode,legacy_trip_time,legacy_ivtt,legacy_xfers,new_skim_time,new_ivtt,new_transfers,trips_source
0,17448,35092,56,288,11,29.55682,8.51168,1.0,45.886106,20.470001,0.0,legacy
1,17448,35092,522,56,11,19.90274,4.854838,1.0,33.509367,0.0,0.0,legacy
2,17448,35096,56,355,11,23.23464,6.358507,1.0,32.594955,5.680197,0.0,legacy
3,17462,35139,56,37,11,23.31851,6.465103,1.0,33.680079,7.689486,0.0,legacy
4,17462,35140,56,541,11,30.84505,12.257814,1.0,41.68714,10.894145,0.0,legacy


In [112]:
WALK_TRANSIT_MODE = 11
RELEVANT_TIME_PERIODS = [4, 5, 6, 7, 8, 9, 10, 11]
n_df = new_trips_df[new_trips_df["trip_mode"] == WALK_TRANSIT_MODE].copy()
n_df = n_df[n_df["stop_period"].isin(RELEVANT_TIME_PERIODS)].copy()
n_df = pd.merge(n_df, maz_crosswalk_df, left_on = "orig_mgra", right_on = "MAZ", how = "left").rename(columns={"TAZ": "orig_taz"}).drop(columns = ["MAZ"])
n_df = pd.merge(n_df, maz_crosswalk_df, left_on = "dest_mgra", right_on = "MAZ", how = "left").rename(columns={"TAZ": "dest_taz"}).drop(columns = ["MAZ"])
n_df = pd.merge(n_df, new_los_df, left_on = ["orig_taz", "dest_taz"], right_on = ["origin", "destination"], how = "left").drop(columns = ["origin", "destination"])
n_df = n_df[["hh_id", "person_id", "orig_taz", "dest_taz", "trip_mode", "new_skim_time", "new_ivtt", "new_transfers"]].copy()
n_df = pd.merge(n_df, l_df[["orig_taz", "dest_taz", "legacy_trip_time", "legacy_ivtt", "legacy_xfers"]].drop_duplicates(), on = ["orig_taz", "dest_taz"], how = "left")
n_df["trips_source"] = "new"
n_df.head()

Unnamed: 0,hh_id,person_id,orig_taz,dest_taz,trip_mode,new_skim_time,new_ivtt,new_transfers,legacy_trip_time,legacy_ivtt,legacy_xfers,trips_source
0,17496,35193,56,582,11,37.387946,9.301815,0.0,27.38692,8.945125,1.0,new
1,17496,35193,56,582,11,37.387946,9.301815,0.0,25.07499,9.246715,1.0,new
2,17496,35193,56,582,11,37.387946,9.301815,0.0,24.49298,8.945125,1.0,new
3,17496,35193,56,582,11,37.387946,9.301815,0.0,25.92727,9.246715,1.0,new
4,17496,35193,56,582,11,37.387946,9.301815,0.0,26.35928,7.956634,1.0,new


In [113]:
out_df = pd.concat([l_df, n_df])
out_df["unique_id"] = out_df.reset_index().index + 1
out_df.to_csv(output_file_name, index = False)