In [1]:
'''
in this script there are four functions
eBuses_flag_binary
subway_flag_binary
commuterRail_flag_binary
ferry_flag_binary
JZ - April 20, 2022
'''
import pandas as pd
import numpy as np

In [2]:
def eBuses_flag_binary(region,TimePeriods,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        region (list): Areas (PUMAs) accessible by Buses
        schedule (list): Time Periods of Buses
        
    Changable inputs:
        affordability (0-100): Commuting costs as % of income
        fixgaps (True/False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible eBuses commuter or not
    '''

    region_hardcap = ipums['unique_id'].isin(region)
    schedule_hardcap = ipums['ARRIVES_AT_WORK_HOUR'].isin(TimePeriods) ## -> arrive time or?
    
    affordability_changable = ipums['TOTAL_PERSONAL_INCOME']>=60*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums['MODE_TRANSP_TO_WORK_HBDMATCH']=='Bus'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [3]:
def subway_flag_binary(region,TimePeriods,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        region (list): Areas (PUMAs) accessible by subway
        schedule (list): Time Periods of subway
        
    Changable inputs:
        affordability (0-100): Commuting costs as % of income
        fixgaps (True/False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible subway commuter or not
    '''
        
    region_hardcap = ipums['unique_id'].isin(region)
    schedule_hardcap = ipums['ARRIVES_AT_WORK_HOUR'].isin(TimePeriods) ## -> arrive time or?
    
    affordability_changable = ipums['TOTAL_PERSONAL_INCOME']>=120*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums['MODE_TRANSP_TO_WORK_HBDMATCH']=='Subway'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [4]:
def commuterRail_flag_binary(region,TimePeriods,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        region (list): Areas (PUMAs) accessible by Commuter Rail
        schedule (list): Time Periods of Commuter Rail
        
    Changable inputs:
        affordability (0-100): Commuting costs as % of income
        fixgaps (True/False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible Commuter Rail commuter or not
    '''
        
    region_hardcap = ipums['unique_id'].isin(region)
    schedule_hardcap = ipums['ARRIVES_AT_WORK_HOUR'].isin(TimePeriods) ## -> arrive time or?
    
    affordability_changable = ipums['TOTAL_PERSONAL_INCOME']>=150*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums['MODE_TRANSP_TO_WORK_HBDMATCH']=='CommuterRail'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [5]:
def ferry_flag_binary(region,TimePeriods,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        region (list): Areas (PUMAs) accessible by ferry
        schedule (list): Time Periods of ferry
        
    Changable inputs:
        affordability (0-100): Commuting costs as % of income
        fixgaps (True/False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible ferry commuter or not
    '''
        
    region_hardcap = ipums['unique_id'].isin(region)
    schedule_hardcap = ipums['ARRIVES_AT_WORK_HOUR'].isin(TimePeriods) ## -> arrive time or?
    
    affordability_changable = ipums['TOTAL_PERSONAL_INCOME']>=180*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums['MODE_TRANSP_TO_WORK_HBDMATCH']=='Ferry'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

### Data

In [6]:
ipums = pd.read_csv("../ipums_data/disaggregated_cleaned_ipums_data.csv",index_col=0)
ipums = ipums[ipums['YEAR']==2019].reset_index(drop=True)
### create an unique id for each puma
ipums['HOME_STATEFIP'] = ipums['HOME_STATEFIP'].apply(lambda x:'{:0>2d}'.format(x))
ipums['HOME_PUMA'] = ipums['HOME_PUMA'].apply(lambda x:'{:0>5d}'.format(x))
ipums['unique_id'] = ipums['HOME_STATEFIP'].astype(str) + ipums['HOME_PUMA'].astype(str)

#### Calculating the accessibility of public transportation

In [7]:
ipums_puma_mode = ipums.groupby(by=['unique_id','MODE_TRANSP_TO_WORK_HBDMATCH']).agg({"PERWT":"sum"}).reset_index()
ipums_puma_mode = ipums_puma_mode.pivot_table(values='PERWT',index=['unique_id'],columns='MODE_TRANSP_TO_WORK_HBDMATCH').reset_index()
ipums_puma_mode.columns.name = ''
### perctange of each transmode of all commuters in the unique puma
res = ipums_puma_mode.set_index(['unique_id'])
res = res.div(res.sum(axis=1), axis=0).reset_index()
# res[['Bus','Subway','CommuterRail','Ferry']].describe([.01,.1,.2,.3,.4,.5,.6,.7,.8,.9,.99])

In [8]:
### set a threshold: if the percentage of bus is larger than 0.2, then this area is bus-friendly
PT_region = res[['unique_id']].copy(deep=True)
PT_region['Bus'] = res['Bus'].apply(lambda x: True if x>=0.128702 else False)
PT_region['Subway'] = res['Subway'].apply(lambda x: True if x>=0.211732 else False)
PT_region['CommuterRail'] = res['CommuterRail'].apply(lambda x: True if x>=0.322561 else False)
PT_region['Ferry'] = res['Ferry'].apply(lambda x: True if x>=0.032586 else False)

### Test

In [9]:
region_Bus = PT_region[PT_region['Bus']==True]['unique_id'].to_list()
region_Subway = PT_region[PT_region['Subway']==True]['unique_id'].to_list()
region_CommuterRail = PT_region[PT_region['CommuterRail']==True]['unique_id'].to_list()
region_Ferry = PT_region[PT_region['Ferry']==True]['unique_id'].to_list()

time_Bus = list(range(6,22))
time_Subway = list(range(0,24))
time_CommuterRail = list(range(6,22))
time_Ferry = list(range(7,21))

In [10]:
########################    
#### How to Run ########
########################
ipums['FLAG_EBUSES'] = eBuses_flag_binary(region=region_Bus,
                                          TimePeriods=time_Bus,
                                          affordability=20,
                                          fixgaps=False)

ipums['FLAG_SUBWAY'] = subway_flag_binary(region=region_Subway,
                                          TimePeriods=time_Subway,
                                          affordability=20,
                                          fixgaps=False)

ipums['FLAG_COMMUTERRAIL'] = commuterRail_flag_binary(region=region_CommuterRail,
                                                      TimePeriods=time_CommuterRail,
                                                      affordability=20,
                                                      fixgaps=False)

ipums['FLAG_FERRY'] = ferry_flag_binary(region=region_Ferry,
                                               TimePeriods=time_Ferry,
                                               affordability=20,
                                               fixgaps=False)

In [11]:
######################    
###### CHECKS ########
######################
ipums.groupby(by=['FLAG_SUBWAY']).agg({"PERWT":"sum"})

Unnamed: 0_level_0,PERWT
FLAG_SUBWAY,Unnamed: 1_level_1
0,704376.0
1,2089879.0


In [12]:
check_df = ipums[['PERWT','FLAG_EBUSES','FLAG_SUBWAY','FLAG_COMMUTERRAIL','FLAG_FERRY']].copy(deep=True)
check_df['FLAG_EBUSES'] = check_df['FLAG_EBUSES']*check_df['PERWT']
check_df['FLAG_SUBWAY'] = check_df['FLAG_SUBWAY']*check_df['PERWT']
check_df['FLAG_COMMUTERRAIL'] = check_df['FLAG_COMMUTERRAIL']*check_df['PERWT']
check_df['FLAG_FERRY'] = check_df['FLAG_FERRY']*check_df['PERWT']
pd.DataFrame(check_df.sum())

Unnamed: 0,0
PERWT,2794255.0
FLAG_EBUSES,496876.0
FLAG_SUBWAY,2089879.0
FLAG_COMMUTERRAIL,377697.0
FLAG_FERRY,212503.0


In [13]:
ipums.groupby(by=['MODE_TRANSP_TO_WORK_HBDMATCH']).agg({"PERWT":"sum"})

Unnamed: 0_level_0,PERWT
MODE_TRANSP_TO_WORK_HBDMATCH,Unnamed: 1_level_1
AutoOccupants,364405.0
Bicycle,30747.0
Bus,307292.0
CommuterRail,341180.0
Ferry,27937.0
Other,12303.0
Subway,1425330.0
WFH,70151.0
Walk,214910.0
