## Match Coordinates from Survey Responses to SANDAG Geographies
Put coordinates into Series15 geographies, drop coordinates in responses in next notebook

In [1]:
# NOTE: python notebook version of the script: notebooks\data_processing\x05_data_processing.py

## Set-up

In [1]:
import geopandas as gpd
import numpy as np
import openmatrix as omx
import pandas as pd

import os

from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
# found in GitHub repo -
# https://github.com/SANDAG/Survey_Airport/blob/192019a7fd2cca1986af9a2e25d287fa9cdd7648/data_model/utils.py#L32
survey_crs = "EPSG:4326"
selected_geography = 'TAZ' #'MGRA'

In [39]:
# input
geography_file = f"../data/geographies/{selected_geography}15.shp"
# processed_survey_data_path = "../data/processed/data_model_output.csv"
processed_survey_data_path = "../data/processed/data_model_output.csv"
base_scenario_path = r"T:\STORAGE-63T\2025RP_draft\abm_runs_v2\2022_S0_v2"

# output
survey_data_matched_geographies = f'../data/processed/survey_data_matched_geographies_{selected_geography.lower()}.csv'

In [None]:
# read in data
survey_respondent = (
    pd.read_csv(processed_survey_data_path)
    # .query('record_type_synthetic == False')
    .query("validation_severity_person != 'Critical'")
    .query("validation_severity_trip != 'Critical'")
    .query("weight_departing_only > 0") #using non-synthetic data leaves synthetic records in data w/ 0 weight
    # .query('inbound_or_outbound_label == "INBOUND_TO_AIRPORT"')
)
geographies = gpd.read_file(geography_file).query('~TAZ.isin([3,11])') # remove external TAZs that do not exist
# geographies.head()

  pd.read_csv(processed_survey_data_path)


## Replace Coordinates w/ Geographies

1) for coords in SD counties -> 
    - match to TAZ
    - for coords in ocean nearby, match to closest non-external TAZ
        - WHAT LOGIC ? coordinate box?
2) for origin coords outside of SD county:
    1) if dest coord is inside county, make origin TAZ = AIRPORT
    2) if dest coord is also outside county:
        - match closest coord to closest external TAZ
        - make furthest coord to any external TAZ = AIRPORT
3) for remaining dest coords outside of SD county:
    - make origin TAZ = AIRPORT

In [5]:
# TODO update logic documentation

In [6]:
def make_survey_geodataframe(survey_df: pd.DataFrame, var_prefix:str)->gpd.GeoDataFrame:
    """
    """
    survey_gdf =gpd.GeoDataFrame(
                survey_df,
                geometry=gpd.points_from_xy(
                    survey_df[f"{var_prefix}_longitude"],
                    survey_df[f"{var_prefix}_latitude"]
                ),
                crs=survey_crs,
            )
    return survey_gdf

In [7]:
def transform_geographies_df(geography_df:gpd.GeoDataFrame, var_prefix:str)->gpd.GeoDataFrame:
    """
    """
    geography_df = (
            geography_df
            [[selected_geography, "geometry"]]
            .rename(columns={selected_geography:f'{var_prefix}_{selected_geography.lower()}'})
        )
    return geography_df

In [8]:

def sjoin_geographies(
        survey_df:pd.DataFrame,
        geography_df:gpd.GeoDataFrame,
        var_prefix:str
        )->gpd.GeoDataFrame:
    """
    """
    geography_df = transform_geographies_df(geography_df, var_prefix)
    survey_gdf = make_survey_geodataframe(survey_df, var_prefix)
    survey_gdf = (
        survey_gdf
        .to_crs(geography_df.crs)
        .sjoin(geography_df, how="left")
        .astype({f"{var_prefix}_{selected_geography.lower()}": "Int32"})
        .drop(columns=['index_right'])
    )
    survey_gdf.columns = [col.lower() for col in survey_gdf.columns]
    return survey_gdf


In [9]:
eps4386_coast = {
    'lon': (-117.7,-117.1),
    'lat': (32.535,33.385)
}
def rescue_adrift_respondents(
        survey_df:pd.DataFrame,
        geography_df:gpd.GeoDataFrame,
        var_prefix:str
        )->gpd.GeoDataFrame:
    """
    """
    geo_col = f"{var_prefix}_{selected_geography.lower()}"
    geography_df = transform_geographies_df(geography_df, var_prefix)

    survey_gdf = make_survey_geodataframe(survey_df, var_prefix)
    adrift_respondents_index = (
        survey_gdf
            .query(f'{geo_col}.isnull()')
            .loc[survey_gdf[f'{var_prefix}_latitude'].between(eps4386_coast['lat'][0],eps4386_coast['lat'][1])]
            .loc[survey_gdf[f'{var_prefix}_longitude'].between(eps4386_coast['lon'][0],eps4386_coast['lon'][1])]
            .index
    )
    print(f'num adrift respondents: {adrift_respondents_index.shape}')
    survey_gdf.loc[
        adrift_respondents_index,
        geo_col
        ] = (
                survey_gdf
                .loc[adrift_respondents_index,'geometry']
                .reset_index(drop=False)
                .rename(columns={'index':'adrift_index'})
                .to_crs(geography_df.crs)
                .sjoin_nearest(geography_df, how="left", max_distance = 100000)
                .set_index('adrift_index')
                .astype({geo_col: "Int32"})
                [geo_col]
                .values
    )

    survey_gdf.columns = [col.lower() for col in survey_gdf.columns]
    return survey_gdf


In [10]:
max_distance = 100 * 5280 # max distance is 100 miles, but in feet
def match_coordinates_to_nearby_external_taz(survey_df:pd.DataFrame, geography_df:gpd.GeoDataFrame, var_prefix:str):
    """
    """
    # select only external TAZs
    geo_col = f"{var_prefix}_{selected_geography.lower()}"

    geography_df = transform_geographies_df(geography_df.query(f'TAZ <= 12'), var_prefix)
    geography_df['geometry'] = geography_df['geometry'].centroid
    geography_df = gpd.GeoDataFrame(geography_df)

    survey_gdf = make_survey_geodataframe(survey_df, var_prefix)

    missing_taz_index = (
        survey_df
            .query(f'{geo_col}.isnull()')
            .index
    )
    print(f'num respondents w/ {var_prefix} outside of county: {missing_taz_index.shape}')

    closest_geographies = (
                survey_gdf
                .loc[missing_taz_index, 'geometry']
                .reset_index(drop=False)
                .rename(columns={'index':'missing_index'})
                .to_crs(geography_df.crs)
                .sjoin_nearest(geography_df, how="left", distance_col = 'distance_between_points')
                .groupby(['missing_index', geo_col])
                ['distance_between_points']
                .min()
                .reset_index(drop=False)
                .set_index('missing_index')
                .astype({geo_col: "Int32"})
        )
    survey_gdf.loc[missing_taz_index,geo_col]=(
        closest_geographies
            .where(closest_geographies['distance_between_points'] < max_distance, None)
            [geo_col]
            # .values
    )

    survey_gdf.columns = [col.lower() for col in survey_gdf.columns]
    return survey_gdf

In [11]:
# process survey dataframe geographic features
survey_respondent_geographies = survey_respondent.copy()
for var_prefix in ['origin','destination','home_location','transit_boarding','transit_alighting']:
    survey_respondent_geographies = sjoin_geographies(survey_respondent_geographies, geographies, var_prefix)
    survey_respondent_geographies = rescue_adrift_respondents(survey_respondent_geographies, geographies, var_prefix)
survey_respondent_geographies = match_coordinates_to_nearby_external_taz(survey_respondent_geographies, geographies, 'origin')
print(f'remaining null origin TAZs: {survey_respondent_geographies['origin_taz'].isnull().sum()}')
# survey_respondent_geographies.head()

num adrift respondents: (9,)
num adrift respondents: (0,)
num adrift respondents: (4,)
num adrift respondents: (0,)
num adrift respondents: (0,)
num respondents w/ origin outside of county: (270,)
remaining null origin TAZs: 0


## QC

In [12]:
# see how many pax + employees board transit at Old Town
old_town_taz15 = [1376,1385,1403]
(
    survey_respondent_geographies
    .query(f'transit_boarding_taz.isin({old_town_taz15})')
    # .query(f'destination_taz.isin({old_town_taz15})') #null
    .groupby('access_mode_grouped_label')
    ['weight_departing_only']
    .sum()
)

  .query(f'transit_boarding_taz.isin({old_town_taz15})')


access_mode_grouped_label
OTHER                                  0.182047
PERSONAL_CAR_DROPPED_OFF_PICKED_UP     6.821016
PERSONAL_CAR_PARKED                   41.979196
PUBLIC_TRANSPORTATION                  0.182047
RIDEHAIL_TAXI                          1.274332
WALK                                   4.131784
Name: weight_departing_only, dtype: float64

In [13]:
# # rental car destinations
# _, ax = plt.subplots(figsize=(20, 20))

# geographies.boundary.plot(ax = ax, color = 'red', alpha = .1)
# (
#     make_survey_geodataframe(
#         (survey_respondent_geographies
#             .query('main_mode_grouped_label == "RENTAL_CAR"')
#             .query('weight_departing_only>0')
#         ),
#         var_prefix='destination')
#     .to_crs(geographies.crs)
#     .plot(ax = ax, alpha = 1, color = 'gray')#, column='weight_departing_opn', legend=True)
# )
# # zoom in on central SD
# plt.xlim((6.25e6,6.35e6))
# plt.ylim((1.8e6,1.95e6))
# plt.show()

In [14]:
# # heatmap
# var_prefix = "origin"

# _, ax = plt.subplots(figsize=(20, 20))

# geographies.plot(ax = ax, color = 'red', alpha = .1)

# selected_geography_survey_weights = survey_respondent_geographies.groupby(f'{var_prefix}_{selected_geography.lower()}')['weight'].sum()
# geographies_survey_weights = (
#                     geographies.merge(
#                         selected_geography_survey_weights,
#                         left_on=selected_geography,
#                         right_on=f'{var_prefix}_{selected_geography.lower()}'
#                   )
# )

# geographies_survey_weights.to_crs(geographies.crs).plot(ax = ax, alpha = .5, column='weight', legend=True)
# plt.xlim((6.15e6,6.65e6))
# plt.ylim((1.75e6,2.2e6))

# # zoom in on central SD
# plt.xlim((6.25e6,6.35e6))
# plt.ylim((1.8e6,1.95e6))
# plt.show()

In [15]:
# # check adrift respondents OR check null all resp[ondents]
# var_prefix = "origin"

# _, ax = plt.subplots(figsize=(20, 20))

# geographies.to_crs(survey_crs).plot(ax = ax, color = 'orange', alpha = .1)

# survey_mgra_df = gpd.GeoDataFrame(
#                 survey_respondent_geographies,
#                 geometry=gpd.points_from_xy(
#                     survey_respondent_geographies[f"{var_prefix}_longitude"], survey_respondent_geographies[f"{var_prefix}_latitude"]
#                 ),
#                 crs=survey_crs,
#             )
# # survey_mgra_df.query('~origin_taz.isnull()').to_crs(geographies.crs).plot(ax = ax, alpha = .5)
# (
#     survey_mgra_df
#     .query(f'{var_prefix}_taz.isnull()')
#     # .loc[survey_mgra_df[f'{var_prefix}_latitude'].between(32.55,33)]
#     # .loc[survey_mgra_df[f'{var_prefix}_longitude'].between(-117.5,-117)]
#     .plot(ax = ax, alpha = .5, color = 'gray')
# )

# geographies.query('TAZ < 12').to_crs(survey_crs).plot(ax = ax, color = 'red', alpha = 1)

# # plt.xlim((-117.5,-117))
# # plt.ylim((32.55,33))
# plt.show()

In [16]:
# # origin coordinates missing TAZs
# var_prefix = "origin"

# _, ax = plt.subplots(figsize=(20, 20))

# geographies.to_crs(survey_crs).plot(ax = ax, color = 'orange', alpha = .1)

# survey_mgra_df = gpd.GeoDataFrame(
#                 survey_respondent_geographies,
#                 geometry=gpd.points_from_xy(
#                     survey_respondent_geographies[f"{var_prefix}_longitude"], survey_respondent_geographies[f"{var_prefix}_latitude"]
#                 ),
#                 crs=survey_crs,
#             )
# survey_mgra_df.query(f'{var_prefix}_taz.isnull()').plot(ax = ax, alpha = .5, label = 'missing_TAZ')
# # survey_mgra_df.query(f'{var_prefix}_taz.notna()').plot(ax = ax, alpha = .5, color = 'gray', label = 'has_TAZ')
# plt.legend()

# # geographies.query('TAZ < 12').to_crs(survey_crs).plot(ax = ax, color = 'red', alpha = 1)
# geographies.to_crs(survey_crs).plot(ax = ax, color = 'red', alpha = 1)

# # plt.xlim((-117.7,-116))
# # plt.ylim((32.5,33.5))
# plt.show()

## Skims - Auto and Transit

### Skim Reading Architecture

In [17]:
survey_output = survey_respondent_geographies.query('(origin_taz.notna()) and (destination_taz.notna())')
print(f'Drops {survey_respondent_geographies.shape[0] - survey_output.shape[0]} respondents')

Drops 0 respondents


In [18]:
skim_path = os.path.join(base_scenario_path,'output','skims')
transit_am_skims_path = os.path.join(skim_path,'transit_skims_AM.omx')
traffic_am_skims_path = os.path.join(skim_path,'traffic_skims_AM.omx')

traffic_am_skims = omx.open_file(traffic_am_skims_path, 'r')
transit_am_skims = omx.open_file(transit_am_skims_path, 'r')

In [19]:
def open_omx(veh_type:str, tod:str):
    """ Navigate to and open skims

    Args:
        veh_typ (str): 'traffic' or 'transit'
        tod (str): uppercase time-of-day value ('EA','AM','MD','PM','EV')

    Returns:
        omx_file: target skim
    """
    if veh_type not in ('traffic', 'transit') or tod not in ('EA','AM','MD','PM','EV'):
        raise ValueError('Invalid argument for function "open_omx()"')
    skim_path = os.path.join(base_scenario_path,'output','skims',f'{veh_type}_skims_{tod}.omx')
    omx_file = omx.open_file(skim_path, 'r')

    return omx_file

In [20]:
def read_skims(skims, values)->pd.DataFrame:
    """
    Convert skims from omx to pandas DataFrame
    """
    zones = list(skims.mapping('zone_number').keys())
    df = pd.DataFrame(
        np.array(skims[values]),
        zones,
        zones
    )
    return df

In [21]:
def retrieve_skim_value(
        row,
        skim,
        set_zero_val_to_null=False,
        origin_col='origin_taz',
        destination_col = 'destination_taz'
        ):
    """Pandas .apply() function that gets skim values for O-D TAZ pairs
    """
    value = skim.loc[row[origin_col], row[destination_col]]
    if set_zero_val_to_null and value == 0:
        value = None
    return value

#### TOD Binning for Respondents
https://github.com/SANDAG/ABM/blob/87c3eac743973719d02ffba5b46bb81123e337ac/docs/inputs.md?plain=1#L268
- EA = Early morning (3am - 5:59am)<br>
- AM = AM peak (6am to 8:59am)<br>
- MD = Mid-day (9am to 3:29pm)<br>
- PM = PM peak (3:30pm to 6:59pm)<br>
- EV = Evening (7pm to 2:59am)<br>

In [22]:
# survey_output[['skim_tod','trip_start_time','trip_start_time_label']].drop_duplicates().sort_values(by='trip_start_time')

In [23]:
tod_list = ['EA','AM','MD','PM','EV','EA']
time_cut = [0,2,8,21,28,44,48]

In [24]:
survey_output.loc[:,'skim_tod'] = (
    pd.cut(
        survey_output['trip_start_time'],
        bins = time_cut,
        right = True,
        include_lowest = True,
        labels = tod_list,
        ordered = False
    )
)

### Adjustments for Irregular Transit Trips

In [25]:
# # some respondents drove long distances to Old Town/Rt992/etc to take transit to the airport
    # # these respondents need to have custom skim calcs
# drop_off_transit_unique_ids = [90,699,1075,1109,1209,1267,1608,1635,3381,5229]
# drop_off_transit_unique_id_index = survey_output.query(f'unique_id.isin({drop_off_transit_unique_ids})').index.copy()
# survey_output.loc[drop_off_transit_unique_id_index, 'transit_and_auto_costs'] = True

# # preserve to compare access times
# survey_output['auto_origin_taz'] = survey_output['origin_taz']
# survey_output.loc[:,'auto_destination_taz'] = survey_output['destination_taz'].copy()
# survey_output.loc[drop_off_transit_unique_id_index,'auto_destination_taz'] = survey_output.loc[drop_off_transit_unique_id_index,'transit_boarding_taz']

# # TODO currently not used
# TODO - apply to ALL null times, not just transit options
survey_output.loc[:,'transit_origin_taz'] = survey_output['origin_taz'].copy()
# survey_output.loc[drop_off_transit_unique_id_index,'transit_origin_taz'] = survey_output.loc[drop_off_transit_unique_id_index,'transit_boarding_taz']
# survey_output['transit_destination_taz'] = survey_output['destination_taz']

In [26]:
# auto_modes = ['DROVE_ALONE_AND_PARKED','GET_IN_PARKED_VEHICLE_AND_DRIVE_WITH_OTHERS',
#        'DROPPED_OFF_BY_FAMILY_FRIEND','UBER_LYFT', 'HOTEL_SHUTTLE_VAN', 'DROVE_WITH_OTHERS_AND_PARKED',
#        'CAR_SERVICE_BLACK_LIMO', 'PICKED_UP_BY_FAMILY_FRIEND','GET_IN_PARKED_VEHICLE_AND_DRIVE_ALONE',
#        'BICYCLE_PERSONAL_ELECTRIC', 'RENTAL_CAR_PARKED','RENTAL_CAR_DROPPED_OFF', 'TAXI',
#        'GET_IN_PARKED_VEHICLE_AND_RIDE_WITH_OTHER_TRAVELERS','RENTAL_CAR_PICKED_UP', 'RENTAL_CAR_GET_IN_PARKED',
#        'OTHER_SHARED_VAN', 'CHARTERED_TOUR_BUS','RODE_WITH_OTHER_TRAVELERS_AND_PARKED','EMPLOYEE_SHUTTLE']
# transit_modes = ['AIRPORT_FLYER_SHUTTLE','MTS_ROUTE_992','OTHER_PUBLIC_TRANSIT']
# at_modes = ['WALK', 'BICYCLE_PERSONAL_NON_ELECTRIC','E_SCOOTER_PERSONAL', 'BICYCLE_NON_ELECTRIC_BIKESHARE',
#             'BICYCLE_ELECTRIC_BIKESHARE', 'WHEELCHAIR_OR_MOBILITY_DEVICE']
# other_modes = ['OTHER']

# auto_mode_index = survey_output.query(f'main_mode_label.isin({auto_modes})').index
# auto_mode_index = auto_mode_index.tolist() + drop_off_transit_unique_id_index.tolist()
# transit_mode_index = survey_output.query(f'main_mode_label.isin({transit_modes})').index

In [None]:
map_transit_tazs = {
    4: 4587,
    # 6:
    # 7:
    8: 2345,
    9: 2196,
    10: 1930,
    12: 15,
    14: 15,
    17: 42,
    71: 42,
    1394: 1200,
    1457: 1472,
    2119: 1930,
    2138: 1930,
    3657: 1930,
    3733: 1930,
    4705: 4586,
    4712: 4586,
    4739: 4586,
    4747: 4586,
    4754: 4586,
    # 4836:
    # 4863:
    # 4876:
    # 4877:
    # 4890:
    # 4894:
}

survey_output['transit_origin_taz'] = survey_output['origin_taz'].replace(map_transit_tazs)

### Read in Skims

In [29]:
# read in auto skims
auto_skim_names = ['SOV_NT_M_DIST','SOV_NT_M_TIME','SOV_NT_M_TOLLCOST']
auto_skim_new_names = ['auto_dist','auto_time','auto_tollcost']
transit_boarding_index = survey_output['transit_boarding_taz'].notna()

for tod in tod_list:
    tod_omx_file = open_omx('traffic',tod)
    tod_index = (survey_output['skim_tod']==tod)
    for skim_name,col_name in zip(auto_skim_names,auto_skim_new_names):
        skim = read_skims(tod_omx_file,f'{skim_name}__{tod}')
        # get auto skims for all survey respondents
        survey_output.loc[tod_index,col_name] = (
            survey_output
            .loc[tod_index]
            .apply(retrieve_skim_value,
                    skim = skim,
                    set_zero_val_to_null = (col_name in ['auto_dist','auto_time']),
                    axis = 1)
            )
        # generate comparisons for transit ACC times
        if col_name == 'auto_time':
            survey_output.loc[tod_index * transit_boarding_index,'auto_to_transit_time'] = (
                survey_output
                .loc[tod_index * transit_boarding_index]
                .apply(retrieve_skim_value,
                        skim=skim,
                        destination_col='transit_boarding_taz',
                        set_zero_val_to_null=True,
                        axis=1)
                )

In [30]:
# read in transit skims
transit_access_modes = ['PNROUT','WALK']
transit_flavors = ['LOC','MIX','PRM']
transit_values = ['ACC','FIRSTWAIT','TOTALIVTT','XFERWAIT','EGR','FARE','XFERS']

for tod in tod_list:
    tod_omx_file = open_omx('transit',tod)
    tod_index = (survey_output['skim_tod']==tod)
    for transit_access_mode in transit_access_modes:
        for transit_flavor in transit_flavors:
            transit_mode = f'{transit_access_mode}_{transit_flavor}'.lower()
            survey_output[f'{transit_mode}_time'] = 0.0
            for transit_value in transit_values:
                col_name = f'{transit_mode}_{transit_value.lower()}'
                skim = read_skims(tod_omx_file,f'{col_name.upper()}__{tod}')
                survey_output.loc[tod_index, col_name] = (
                    survey_output
                    .loc[tod_index]
                    .apply(retrieve_skim_value,
                            skim = skim,
                            origin_col = 'transit_origin_taz',
                            set_zero_val_to_null = (transit_value=='FARE'),
                            axis = 1)
                    )
                    # survey_output.drop(
                    #     columns=[f'{transit_mode}_{transit_value}'],
                    #     inplace=True
                    #     )
            # # set transit trips w/ total time = 0 to NULL
            # survey_output.loc[
            #         tod_index * (survey_output[f'{transit_mode}_time']==0),
            #         f'{transit_mode}_time'
            #     ] = None
# survey_output.head()


In [31]:
# # transit adjustments for coaster riders
#     # coming from Camp Pendleton
# survey_output.loc[
#     survey_output['unique_id'].isin([3464,4453]),
#     ['pnrout_prm_acc','pnrout_prm_time']
#     ] += 15.0

## Transit Type

In [32]:
# calculate total transit travel times
for transit_access_mode in transit_access_modes:
    for transit_flavor in transit_flavors:
        transit_mode = f'{transit_access_mode}_{transit_flavor}'.lower()
        survey_output[f'{transit_mode}_time'] = (
            survey_output[[
                    f'{transit_mode}_acc',
                    f'{transit_mode}_firstwait',
                    f'{transit_mode}_xferwait',
                    f'{transit_mode}_totalivtt',
                    f'{transit_mode}_egr'
                ]].sum(axis=1)
        )

        # transit trips cannot have 0 trip time
        survey_output.loc[
                survey_output[f'{transit_mode}_time'] == 0,
                f'{transit_mode}_time'
            ] = None


In [33]:
transit_mode = ['MTS_ROUTE_992','AIRPORT_FLYER_SHUTTLE','OTHER_PUBLIC_TRANSIT']
transit_mode_index = survey_output.query(f'main_mode_label.isin({transit_mode})').index
# survey_output.loc[transit_mode_index].fillna('------').groupby(['access_mode','access_mode_label'])['unique_id'].count()

In [34]:
# simplify transit access to match to skims
    # PNR access is (2.0)
    # walk acccess code is (1.0)
survey_output['simplified_transit_access'] = (survey_output.loc[transit_mode_index, 'access_mode'] != 1.0) + 1
survey_output['simplified_transit_access'] = survey_output['simplified_transit_access'].fillna(2.0)

In [35]:
walk_transit_cols = ['walk_loc_time','walk_mix_time','walk_prm_time','walk_loc_fare','walk_mix_fare','walk_prm_fare']
prm_transit_cols = ['pnrout_loc_time','pnrout_mix_time','pnrout_prm_time','pnrout_loc_fare','pnrout_mix_fare','pnrout_prm_fare']
all_time_cols = walk_transit_cols[:3] + prm_transit_cols[:3]

In [36]:


# walk to transit has transit type w/ lowest walk to transit stop transit travel times
survey_output.loc[
        survey_output['simplified_transit_access'] == 1,
        'transit_type'
    ] = (
    survey_output
        .loc[
            survey_output['simplified_transit_access'] == 1,
            walk_transit_cols[:3] # transit time columns
            ] #WALK
        .idxmin(skipna=True, axis=1)
        .str.rsplit('_', n=1, expand = True)
        [0]
    )
# not walk to transit gets lowest transit travel time
survey_output.loc[
        survey_output['simplified_transit_access'] == 2,
        'transit_type'
    ] = (
    survey_output
        .loc[
            (survey_output['simplified_transit_access'] == 2) # NOT WALK transit access
            ,all_time_cols # transit time columns, take lowest between walk and prm -> some walk less than prm
            ]
        .idxmin(skipna=True, axis=1)
        .str.rsplit('_', n=1, expand = True)
        [0]
    )

In [37]:
# get times and fares for each respondent riding transit based on transit_type column
transit_type_index = survey_output.query('transit_type.notna()').index
for metric in ['time','fare', 'acc', 'firstwait', 'totalivtt', 'xferwait', 'egr', 'xfers']:
    survey_output.loc[:, f'transit_{metric}'] = (
        survey_output
        .loc[transit_type_index]
        .apply(lambda x: x[f"{x['transit_type']}_{metric}"], axis=1)
    )

## Write Out Data

In [38]:
(
    survey_output
    .to_csv(survey_data_matched_geographies)#, index_label = 'unique_id')
)