In [1]:
import pandas as pd

ipums_df = pd.read_csv("../ipums_data/disaggregated_cleaned_ipums_data.csv",index_col=0)

In [2]:
ipums_df.head()

Unnamed: 0,YEAR,PERWT,HOME_STATEFIP,HOME_PUMA,PUMA_NAME,SEX,AGE,HRS_WK_DAILY,TOTAL_PERSONAL_INCOME,MODE_TRANSP_TO_WORK,...,ARRIVES_AT_WORK_HOUR,COMMUTE_DIRECTION_MANHATTAN,DISTANCE_KM_TOCBD,COGNITIVE_DIFFICULTY,AMBULATORY_DIFFICULTY,IND_LIVING_DIFFICULTY,SELFCARE_DIFFICULTY,VISION_OR_HEARING_DIFFICULTY,VISION_DIFFICULTY,HEARING_DIFFICULTY
0,2017,42.0,9,500,Litchfield County,M,67,6.0,119400,"Auto, truck, or van",...,17,in,177.094712,0,0,0,0,0,0,0
1,2017,49.0,9,500,Litchfield County,M,44,0.4,19100,"Auto, truck, or van",...,15,in,177.094712,1,0,1,0,0,0,0
2,2017,171.0,9,500,Litchfield County,M,59,9.0,496700,"Auto, truck, or van",...,7,in,177.094712,0,0,0,0,0,0,0
3,2017,312.0,9,500,Litchfield County,M,51,14.0,250000,"Auto, truck, or van",...,7,in,177.094712,0,0,0,0,0,0,0
4,2017,99.0,9,500,Litchfield County,M,56,10.0,710000,"Auto, truck, or van",...,10,in,177.094712,0,0,0,0,0,0,0


In [None]:
'''
    Auto inputs
    Hard Caps:
    max_age - maximum age of drivers. Research from Kaiser Permanente and retirement age indicates 75 is a realistic cut-off
    min_distance - minimum distance traveled by drivers, does not make sense to drive for under 1 mile (2km)
    min_income - minimum income of drivers. Current cutoff is set at the NY poverty line
    cognitive_diff - if the individual has cognitive difficulties, they would not have a drivers license
    ambulatory_diff - if the individual has walking difficulties, they would not have a drivers license
    ind_living_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    selfcare_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    vision_diff - if the individual has vision difficulties, they would not have a drivers license

    Changable inputs:
    male_pct & female_pct - how many, of each sex, will drive a car of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine ridership. 
        Ex) 35 year olds may be 2x more likely to ride than a 50 year old
    home_ownership_pct - 80% of EV owners charge at home (USDOE)
    car_ownership_pct - 91% of US residents own a car
        
    output:
        series (0,1) indicating whether each line is an eligible driver or not
    '''

In [None]:
def auto_flag_binary(max_age,min_distance,min_income,cog_diff, amb_diff, ind_living_diff, selfcare_diff, vision_diff, male_pct,female_pct,age_dist, homeowner, car_owner):
    age_hardcap = ipums_df['AGE'] <= max_age
    dist_hardcap = ipums_df['DISTANCE_KM_TOCBD'] >= min_distance
    income_hardcap = ipums_df['TOTAL_PERSONAL_INCOME'] >= 32626 #poverty line in NY 2019
    cog_diff_hardcap = ipums_df['COGNITIVE_DIFFICULTY'] <= 0
    amb_diff_hardcap = ipums_df['AMBULATORY_DIFFICULTY'] <= 0
    ind_living_diff_hardcap = ipums_df['IND_LIVING_DIFFICULTY'] <= 0
    selfcare_diff_hardcap = ipums_df['SELFCARE_DIFFICULTY'] <= 0
    vision_diff_hardcap = ipums_df['VISION_DIFFICULTY'] <= 0
    
    
    ### Gender - 
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_sex_flag|female_sex_flag

    #home and car ownership
    home_factor = 1.25
    car_ownership = 0.9
    home_mult = ipums_df['HOMEOWNER_LABEL'].apply(lambda x: home_factor if x = "Own" 
                                                      else 1/home_factor)
    car_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= car_ownership else False) 
    
    # grab a random number, if less than prob, home flag, else no flag
    home_flag = home_mult.apply(lambda x: True if random.random() <= x else False)
    
    ### Age - TBD if we use an age distribution or buckets
    final_series = age_hardcap&dist_hardcap&income_hardcog_diff_hardcap&amb_diff_hardcap&ind_living_diff_hardcap&selfcare_diff_hardcap&vision_diff_hardcap&sex_flag&car_flag&home_flag
    
    

    
    return final_series.astype(int)

In [None]:
'''
    Motorcycle inputs
    Hard Caps:
    max_age - maximum age of drivers. Research from Kaiser Permanente and retirement age indicates 75 is a realistic cut-off
    max_distance - max distance traveled by drivers, avg motorcycle tank only holds 150 miles
    min_income - minimum income of drivers. Current cutoff is set at the NY poverty line
    cognitive_diff - if the individual has cognitive difficulties, they would not have a drivers license
    ambulatory_diff - if the individual has walking difficulties, they would not have a drivers license
    ind_living_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    selfcare_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    vision_diff - if the individual has vision difficulties, they would not have a drivers license

    Changable inputs:
    male_pct & female_pct - how many, of each sex, will drive a car of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine ridership. 
        Ex) 35 year olds may be 2x more likely to ride than a 50 year old
        
    output:
        series (0,1) indicating whether each line is an eligible driver or not
    '''

In [None]:
def motorcycle_flag_binary(max_age,max_distance,min_income,cog_diff, amb_diff, ind_living_diff, selfcare_diff, vision_diff, male_pct,female_pct,age_dist):
    age_hardcap = ipums_df['AGE'] <= max_age
    dist_hardcap = ipums_df['DISTANCE_KM_TOCBD'] >= max_distance
    income_hardcap = ipums_df['TOTAL_PERSONAL_INCOME'] >= 32626 #poverty line in NY 2019
    cog_diff_hardcap = ipums_df['COGNITIVE_DIFFICULTY'] <= 0
    amb_diff_hardcap = ipums_df['AMBULATORY_DIFFICULTY'] <= 0
    ind_living_diff_hardcap = ipums_df['IND_LIVING_DIFFICULTY'] <= 0
    selfcare_diff_hardcap = ipums_df['SELFCARE_DIFFICULTY'] <= 0
    vision_diff_hardcap = ipums_df['VISION_DIFFICULTY'] <= 0
    
    
    ### Gender - 
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_sex_flag|female_sex_flag

    
    ### Age - TBD if we use an age distribution or buckets
    final_series = age_hardcap&dist_hardcap&income_hardcap&cog_diff_hardcap&amb_diff_hardcap&ind_living_diff_hardcap&selfcare_diff_hardcap&vision_diff_hardcap&sex_flag
    
    

    
    return final_series.astype(int)

In [None]:
'''
    Taxicab inputs
    Hard Caps:
    max_distance - max distance of taxicab ride (~15 miles or 30 km)
    min_income - minimum income of drivers. Current cutoff is set at the NY poverty line

    Changable inputs:
    male_pct & female_pct - how many, of each sex, will take a taxi of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine ridership. 
        Ex) 35 year olds may be 2x more likely to ride than a 50 year old
        
    output:
        series (0,1) indicating whether each line is an eligible driver or not
    '''

In [None]:
def taxicab_flag_binary(max_distance,min_income, male_pct,female_pct,age_dist):
    dist_hardcap = ipums_df['DISTANCE_KM_TOCBD'] >= max_distance
    income_hardcap = ipums_df['TOTAL_PERSONAL_INCOME'] >= 32626 #poverty line in NY 2019
    
    
    ### Gender - 
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_sex_flag|female_sex_flag

    
    ### Age - TBD if we use an age distribution or buckets
    final_series = dist_hardcap&income_hardcap&sex_flag
    
    

    
    return final_series.astype(int)