In [None]:
'''
in this script there are four functions

bus_flag_binary
subway_flag_binary
commuterRail_flag_binary
ferry_flag_binary

Jingrong - Updated April 25, 2022
'''

Note: Just think about one thing, when modeling 'How many commuters can switch between different modes of transportation?'<br>
it's meaningful to ask questions like how many car commuters can change to public transportation?<br>
For now, there seems no meaning to let current public transporation commuters to choose among Subway/rail/ferry/bus<br>
So, after this week, will try another model method for public transportation:<br>
1. Think of public transporation commuters’ current mode choices as the results of all kinds of constraints, we only need to take their current choice
2. For non public transporation commuters, use the percentage of public transporation commuters in the same puma as their possibilties to use public transporation

In [1]:
import pandas as pd
ipums_df = pd.read_csv("../../01_DataExploration_and_Engineering/disaggregated_cleaned_ipums_data.csv",index_col=0)

In [2]:
reachable_puma_home = pd.read_csv("../regional_transit_system/reachable_puma_home.csv")
puma_home_Bus = reachable_puma_home[reachable_puma_home['Bus']==1]['PUMAKEY_HOME'].to_list()
puma_home_Subway = reachable_puma_home[reachable_puma_home['Subway']==1]['PUMAKEY_HOME'].to_list()
puma_home_CommuterRail = reachable_puma_home[reachable_puma_home['CommuterRail']==1]['PUMAKEY_HOME'].to_list()
puma_home_Ferry = reachable_puma_home[reachable_puma_home['Ferry']==1]['PUMAKEY_HOME'].to_list()

In [3]:
reachable_puma_work = pd.read_csv("../regional_transit_system/reachable_puma_work.csv")
puma_work_Bus = reachable_puma_work[reachable_puma_work['Bus']==1]['PUMAKEY_WORK'].to_list()
puma_work_Subway = reachable_puma_work[reachable_puma_work['Subway']==1]['PUMAKEY_WORK'].to_list()
puma_work_CommuterRail = reachable_puma_work[reachable_puma_work['CommuterRail']==1]['PUMAKEY_WORK'].to_list()
puma_work_Ferry = reachable_puma_work[reachable_puma_work['Ferry']==1]['PUMAKEY_WORK'].to_list()

In [4]:
time_Bus = list(range(6,22))
time_Subway = list(range(0,24))
time_CommuterRail = list(range(6,22))
time_Ferry = list(range(7,21))

In [5]:
def bus_flag_binary(home_region,work_region,schedule,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        home_region (list): Bus-friendly Residential PUMAs, calculated based on current commuting data (% or count)
        work_region (list): Bus-friendly Place of Work PUMAs, calculated based on current commuting data (% or count)
        schedule (list): Operating hours of the buses
        
    Changable inputs:
        affordability (0-100, default 20): Commuting costs as % of income
        fixgaps (True/False, default False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible eBuses commuter or not
    '''

    dir_in = ipums_df['COMMUTE_DIRECTION_MANHATTAN']!='out'
    dir_out = ipums_df['COMMUTE_DIRECTION_MANHATTAN']=='out'
    home_region_hardcap = ipums_df[dir_in]['PUMAKEY_HOME'].isin(home_region)
    work_region_hardcap = ipums_df[dir_out]['PUMAKEY_WORK'].isin(work_region)
    region_hardcap = pd.concat([home_region_hardcap, work_region_hardcap])
    
    schedule_hardcap = ipums_df['DEPARTS_FOR_WORK_HOUR'].isin(schedule)    ## Using the current departure time as a reference
    
    affordability_changable = ipums_df['TOTAL_PERSONAL_INCOME']>=100*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums_df['MODE_TRANSP_TO_WORK_HBDMATCH']=='Bus'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [6]:
def subway_flag_binary(home_region,work_region,schedule,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        home_region (list): Subway-friendly Residential PUMAs, calculated based on current commuting data (% or count)
        work_region (list): Subway-friendly Place of Work PUMAs, calculated based on current commuting data (% or count)
        schedule (list): Operating hours of the subway
        
    Changable inputs:
        affordability (0-100, default 20): Commuting costs as % of income
        fixgaps (True/False, default False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible subway commuter or not
    '''
        
    dir_in = ipums_df['COMMUTE_DIRECTION_MANHATTAN']!='out'
    dir_out = ipums_df['COMMUTE_DIRECTION_MANHATTAN']=='out'
    home_region_hardcap = ipums_df[dir_in]['PUMAKEY_HOME'].isin(home_region)
    work_region_hardcap = ipums_df[dir_out]['PUMAKEY_WORK'].isin(work_region)
    region_hardcap = pd.concat([home_region_hardcap, work_region_hardcap])
    
    schedule_hardcap = ipums_df['DEPARTS_FOR_WORK_HOUR'].isin(schedule)    ## Using the current departure time as a reference
    
    affordability_changable = ipums_df['TOTAL_PERSONAL_INCOME']>=100*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums_df['MODE_TRANSP_TO_WORK_HBDMATCH']=='Subway'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [7]:
def commuterRail_flag_binary(home_region,work_region,schedule,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        home_region (list): CommuterRail-friendly Residential PUMAs, calculated based on current commuting data (% or count)
        work_region (list): CommuterRail-friendly Place of Work PUMAs, calculated based on current commuting data (% or count)
        schedule (list): Operating hours of the commuter rail
        
    Changable inputs:
        affordability (0-100, default 20): Commuting costs as % of income
        fixgaps (True/False, default False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible Commuter Rail commuter or not
    '''
        
    dir_in = ipums_df['COMMUTE_DIRECTION_MANHATTAN']!='out'
    dir_out = ipums_df['COMMUTE_DIRECTION_MANHATTAN']=='out'
    home_region_hardcap = ipums_df[dir_in]['PUMAKEY_HOME'].isin(home_region)
    work_region_hardcap = ipums_df[dir_out]['PUMAKEY_WORK'].isin(work_region)
    region_hardcap = pd.concat([home_region_hardcap, work_region_hardcap])
    
    schedule_hardcap = ipums_df['DEPARTS_FOR_WORK_HOUR'].isin(schedule)    ## Using the current departure time as a reference
    
    affordability_changable = ipums_df['TOTAL_PERSONAL_INCOME']>=100*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums_df['MODE_TRANSP_TO_WORK_HBDMATCH']=='CommuterRail'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [8]:
def ferry_flag_binary(home_region,work_region,schedule,affordability,fixgaps):
    '''
    inputs
    Hard Caps:
        home_region (list): Ferry-friendly Residential PUMAs, calculated based on current commuting data (% or count)
        work_region (list): Ferry-friendly Place of Work PUMAs, calculated based on current commuting data (% or count)
        schedule (list): Operating hours of the ferry
        
    Changable inputs:
        affordability (0-100, default 20): Commuting costs as % of income
        fixgaps (True/False, default False): Whether to fix gaps with current data
        
    output:
        series (0,1) indicating whether each line is an eligible ferry commuter or not
    '''
        
    dir_in = ipums_df['COMMUTE_DIRECTION_MANHATTAN']!='out'
    dir_out = ipums_df['COMMUTE_DIRECTION_MANHATTAN']=='out'
    home_region_hardcap = ipums_df[dir_in]['PUMAKEY_HOME'].isin(home_region)
    work_region_hardcap = ipums_df[dir_out]['PUMAKEY_WORK'].isin(work_region)
    region_hardcap = pd.concat([home_region_hardcap, work_region_hardcap])
    
    schedule_hardcap = ipums_df['DEPARTS_FOR_WORK_HOUR'].isin(schedule)    ## Using the current departure time as a reference
    
    affordability_changable = ipums_df['TOTAL_PERSONAL_INCOME']>=100*12*100/int(affordability)
    
    if fixgaps == True:
        fixgaps_changable = ipums_df['MODE_TRANSP_TO_WORK_HBDMATCH']=='Ferry'
        final_series = region_hardcap & schedule_hardcap & affordability_changable | fixgaps_changable
    elif fixgaps == False:
        final_series = region_hardcap & schedule_hardcap & affordability_changable
    
    return final_series.astype(int)

In [15]:
########################    
#### How to Run ########
########################

ipums_df['FLAG_EBUSES'] = bus_flag_binary(home_region=puma_home_Bus
                                          ,work_region=puma_work_Bus
                                          ,schedule=time_Bus
                                          ,affordability=20
                                          ,fixgaps=False
                                         )

ipums_df['FLAG_SUBWAY'] = subway_flag_binary(home_region=puma_home_Subway
                                             ,work_region=puma_work_Subway
                                             ,schedule=time_Subway
                                             ,affordability=20
                                             ,fixgaps=False
                                            )

ipums_df['FLAG_COMMUTERRAIL'] = commuterRail_flag_binary(home_region=puma_home_CommuterRail
                                                         ,work_region=puma_work_CommuterRail
                                                         ,schedule=time_CommuterRail
                                                         ,affordability=20
                                                         ,fixgaps=False
                                                        )

ipums_df['FLAG_FERRY'] = ferry_flag_binary(home_region=puma_home_Ferry
                                           ,work_region=puma_work_Ferry
                                           ,schedule=time_Ferry
                                           ,affordability=20
                                           ,fixgaps=False
                                          )

In [16]:
######################    
###### CHECKS ########
######################
### fixgaps=False
check_df = ipums_df[ipums_df['YEAR']==2019].copy()
print(check_df.groupby(by=['FLAG_EBUSES']).agg({"PERWT":"sum"}).T)
print(check_df.groupby(by=['FLAG_SUBWAY']).agg({"PERWT":"sum"}).T)
print(check_df.groupby(by=['FLAG_COMMUTERRAIL']).agg({"PERWT":"sum"}).T)
print(check_df.groupby(by=['FLAG_FERRY']).agg({"PERWT":"sum"}).T)

FLAG_EBUSES          0          1
PERWT        1098504.0  1702826.0
FLAG_SUBWAY         0          1
PERWT        743406.0  2057924.0
FLAG_COMMUTERRAIL          0         1
PERWT              2016230.0  785100.0
FLAG_FERRY          0         1
PERWT       2547081.0  254249.0


In [14]:
# ### fixgaps=True
# check_df = ipums_df[ipums_df['YEAR']==2019].copy()
# print(check_df.groupby(by=['FLAG_EBUSES']).agg({"PERWT":"sum"}).T)
# print(check_df.groupby(by=['FLAG_SUBWAY']).agg({"PERWT":"sum"}).T)
# print(check_df.groupby(by=['FLAG_COMMUTERRAIL']).agg({"PERWT":"sum"}).T)
# print(check_df.groupby(by=['FLAG_FERRY']).agg({"PERWT":"sum"}).T)

# ### increased a little, reasonable

FLAG_EBUSES          0          1
PERWT        1029473.0  1771857.0
FLAG_SUBWAY         0          1
PERWT        642545.0  2158785.0
FLAG_COMMUTERRAIL          0         1
PERWT              1934849.0  866481.0
FLAG_FERRY          0         1
PERWT       2533783.0  267547.0


In [17]:
check_df.groupby(by=['MODE_TRANSP_TO_WORK_HBDMATCH']).agg({"PERWT":"sum"})
### much larger than current, need to check which groups (current public transpotation user or non public transpotation user)

Unnamed: 0_level_0,PERWT
MODE_TRANSP_TO_WORK_HBDMATCH,Unnamed: 1_level_1
AutoOccupants,364715.0
Bicycle,30747.0
Bus,308243.0
CommuterRail,342902.0
Ferry,28171.0
Other,12401.0
Subway,1428234.0
WFH,70151.0
Walk,215766.0
