In [None]:
'''
escooter_flag_binary
walking_flag_binary
'''

In [1]:
import pandas as pd
ipums_df = pd.read_csv("https://github.com/BNewborn/mobility-electrification/raw/main/01_DataExploration_and_Engineering/ipums_data_drop/disaggregated_cleaned_ipums_data.csv",index_col=0)
ipums_df.head()

Unnamed: 0,YEAR,PERWT,HOME_STATEFIP,HOME_PUMA,PUMA_NAME,SEX,AGE,HRS_WK_DAILY,TOTAL_PERSONAL_INCOME,MODE_TRANSP_TO_WORK,...,DEPARTS_FOR_WORK_HOUR,ARRIVES_AT_WORK_HOUR,COMMUTE_DIRECTION_MANHATTAN,COGNITIVE_DIFFICULTY,AMBULATORY_DIFFICULTY,IND_LIVING_DIFFICULTY,SELFCARE_DIFFICULTY,VISION_OR_HEARING_DIFFICULTY,VISION_DIFFICULTY,HEARING_DIFFICULTY
0,2017,42.0,9,500,Litchfield County,M,67,6.0,119400,"Auto, truck, or van",...,16,17,in,0,0,0,0,0,0,0
1,2017,49.0,9,500,Litchfield County,M,44,0.4,19100,"Auto, truck, or van",...,14,15,in,1,0,1,0,0,0,0
2,2017,171.0,9,500,Litchfield County,M,59,9.0,496700,"Auto, truck, or van",...,7,7,in,0,0,0,0,0,0,0
3,2017,312.0,9,500,Litchfield County,M,51,14.0,250000,"Auto, truck, or van",...,7,7,in,0,0,0,0,0,0,0
4,2017,99.0,9,500,Litchfield County,M,56,10.0,710000,"Auto, truck, or van",...,8,10,in,0,0,0,0,0,0,0


In [2]:
def escooter_flag_binary(max_age,max_distance,scooter_friendly_origins,amb_diff, ind_living_diff, selfcare_diff, vision_diff, male_pct,female_pct,age_dist):
    '''
    inputs
    Hard Caps:
    max_age - maximum age of e-scooter riders: 60 is a realistic cut-off  
    max_distance - The average speed of electric scooters is around 15 mph (24 km/h),some can go up to 75mph
    scooter_friendly_origins - no specific esooter infrastructure, so use same bike lanes at this moment
    ambulatory_diff - if the individual has walking difficulties, they would not ride an e-scooter
    ind_living_diff - if the individual has difficulties taking care of themselves, they would not ride an e-scooter
    selfcare_diff - if the individual has difficulties taking care of themselves, they would not ride an e-scooter
    vision_diff - if the individual has vision difficulties, they would not ride an e-scooter

  
        
    Changable inputs:
    male_pct & female_pct - how many, of each sex, will ride an e-scooter of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine ridership. 
        Ex) 35 year olds may be 2x more likely to ride than a 50 year old
        
    output:
        series (0,1) indicating whether each line is an eligible e-bike rider or not
    '''
    
    
    age_hardcap = ipums_df['AGE']<=max_age
    dist_hardcap = ipums_df['DISTANCE_KM_TOCBD']<=max_distance
    scooter_infra_locs = ipums_df['PUMA_NAME'].isin(scooter_friendly_origins)
    amb_diff_hardcap = ipums_df['AMBULATORY_DIFFICULTY'] <= 0
    ind_living_diff_hardcap = ipums_df['IND_LIVING_DIFFICULTY'] <= 0
    selfcare_diff_hardcap = ipums_df['SELFCARE_DIFFICULTY'] <= 0
    vision_diff_hardcap = ipums_df['VISION_DIFFICULTY'] <= 0
    
    ### Gender - 
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_sex_flag|female_sex_flag

    
    ### Age - TBD if we use an age distribution or buckets
    
    final_series = age_hardcap&dist_hardcap&scooter_infra_locs&amb_diff_hardcap&ind_living_diff_hardcap&selfcare_diff_hardcap&vision_diff_hardcap&sex_flag
    
    return final_series.astype(int)

In [3]:
def walking_flag_binary(max_age,max_distance,amb_diff, ind_living_diff, selfcare_diff, vision_diff, male_pct,female_pct,age_dist):
    
    '''
    Walking inputs
    Hard Caps:
    max_distance - max distance of a walk 2 miles (can up to 20 miles or 32 km)
    vision_diff - if the individual has vision difficulties, they will not able to walk without professional assistance
    selfcare_diff - if the individual has difficulties taking care of themselves, they will not able to walk without professional assistance
    ambulatory_diff - if the individual has walking difficulties, they would not ride an e-scooter
    
    Changable inputs:
    male_pct & female_pct - how many, of each sex, will take a taxi of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine walking numbers. 
        Ex) 35 year olds may be 2x more likely to walk than a 50 year old
        
    output:
        series (0,1) indicating whether each line is an eligible driver or not
        '''
    age_hardcap = ipums_df['AGE']<=max_age
    dist_hardcap = ipums_df['DISTANCE_KM_TOCBD']<=max_distance
    amb_diff_hardcap = ipums_df['AMBULATORY_DIFFICULTY'] <= 0
    selfcare_diff_hardcap = ipums_df['SELFCARE_DIFFICULTY'] <= 0
    vision_diff_hardcap = ipums_df['VISION_DIFFICULTY'] <= 0
    
    ### Gender
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_flag|female_flag
    
    ### Age - TBD if we use an age distribution or buckets
    final_series = age_hardcap&dist_hardcap&amb_diff_hardcap&sex_flag&selfcare_diff_hardcap&vision_diff_hardcap
    
    return final_series.astype(int)