In [1]:
import pandas as pd
import random

ipums_df = pd.read_csv("../ipums_data/disaggregated_cleaned_ipums_data.csv",index_col=0)

In [2]:
'''
    Motorcycle inputs
    Hard Caps:
    max_age - maximum age of drivers. Research from Kaiser Permanente and retirement age indicates 75 is a realistic cut-off
    max_distance - max distance traveled by drivers, avg motorcycle tank only holds 150 miles
    min_income - minimum income of drivers. Current cutoff is set at the NY poverty line
    cognitive_diff - if the individual has cognitive difficulties, they would not have a drivers license
    ambulatory_diff - if the individual has walking difficulties, they would not have a drivers license
    ind_living_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    selfcare_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license
    vision_diff - if the individual has vision difficulties, they would not have a drivers license

    Changable inputs:
    male_pct & female_pct - how many, of each sex, will drive a car of eligible riders? 0-100 value
    age_dist - to be determined how we can use age distributions to determine ridership. 
    **According to Motorcycle Industry Council Survey, women now make up 19 percent of motorcycle owners. 
    Since the article is published in 2018, the estimated female motorcyclists might increase. So,20% might be 
    an ideal estimated percentage for all age groups:https://www.kirshhelmets.com/wp-content/uploads/2020/03/Women-Numbers-Climbs.pdf
    USATODAY has defined the same:https://www.usatoday.com/story/money/2018/11/30/number-women-motorcycle-riders-u-s-grows-nearly-20-percent/2156000002/
        Ex) 35 year olds may be 2x more likely to ride than a 50 year old
        
    output:
        series (0,1) indicating whether each line is an eligible driver or not
    '''

'\n    Motorcycle inputs\n    Hard Caps:\n    max_age - maximum age of drivers. Research from Kaiser Permanente and retirement age indicates 75 is a realistic cut-off\n    max_distance - max distance traveled by drivers, avg motorcycle tank only holds 150 miles\n    min_income - minimum income of drivers. Current cutoff is set at the NY poverty line\n    cognitive_diff - if the individual has cognitive difficulties, they would not have a drivers license\n    ambulatory_diff - if the individual has walking difficulties, they would not have a drivers license\n    ind_living_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license\n    selfcare_diff - if the individual has difficulties taking care of themselves, they would not have a drivers license\n    vision_diff - if the individual has vision difficulties, they would not have a drivers license\n\n    Changable inputs:\n    male_pct & female_pct - how many, of each sex, will drive a car

In [3]:
def motorcycle_flag_binary(max_age,max_distance,min_income,male_pct,female_pct,age_dist):
    age_hardcap = ipums_df['AGE'] <= max_age
    dist_hardcap = ipums_df['DISTANCE_KM'] <= max_distance
    income_hardcap = ipums_df['TOTAL_PERSONAL_INCOME'] >= 32626 #poverty line in NY 2019
    cog_diff_hardcap = ipums_df['COGNITIVE_DIFFICULTY'] <= 0
    amb_diff_hardcap = ipums_df['AMBULATORY_DIFFICULTY'] <= 0
    ind_living_diff_hardcap = ipums_df['IND_LIVING_DIFFICULTY'] <= 0
    selfcare_diff_hardcap = ipums_df['SELFCARE_DIFFICULTY'] <= 0
    vision_diff_hardcap = ipums_df['VISION_DIFFICULTY'] <= 0
    
    
    ### Gender - 
    male_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= male_pct/100 and x=='M' else False)
    female_sex_flag = ipums_df['SEX'].apply(lambda x: True if random.random() <= female_pct/100 and x=='F' else False)
    sex_flag = male_sex_flag|female_sex_flag

    
    ### Age - TBD if we use an age distribution or buckets
    final_series = age_hardcap&dist_hardcap&income_hardcap&cog_diff_hardcap&amb_diff_hardcap&ind_living_diff_hardcap&selfcare_diff_hardcap&vision_diff_hardcap&sex_flag
    
    

    
    return final_series.astype(int)

In [4]:
ipums_df['FLAG_MOTORCYCLE']=motorcycle_flag_binary(max_age=70
                                       ,max_distance = 300
                                       ,min_income = 32626
                                       ,male_pct = 100
                                       ,female_pct = 20
                                       ,age_dist = None
                                        )

In [5]:
ipums_df.groupby(by=['FLAG_MOTORCYCLE']).agg({"PERWT":"sum"})

Unnamed: 0_level_0,PERWT
FLAG_MOTORCYCLE,Unnamed: 1_level_1
0,3000269.0
1,2566460.0
