# Make vehicle type estimation file
use 2017 NHTS to create a choice model estimation file for ActivitySim vehicle type model

Updated vehicle age categories

In [1]:
import pandas as pd
import numpy as np

pd.options.display.max_rows = 999
pd.options.display.max_columns = 99
pd.options.display.float_format = '{:,.2f}'.format

In [2]:
# read the basic data
hh   = pd.read_csv('hhpub.csv')
pers = pd.read_csv('perpub.csv')
veh  = pd.read_csv('vehpub.csv')


### Calculate several derived fields and merge the HH and person files with the vehicles file

In [3]:
# define person type

def determine_person_type(row):
    
    # children under 5 not included in NHTS data
    if row['R_AGE_IMP']<=4: 
        return 'Child0_4'
    
    # children 5-15
    elif row['R_AGE_IMP']<=15: 
        return 'Child5_15'
    
    # children 16-17
    elif row['R_AGE_IMP']<=17:
        return 'Child16p'
    
    # school type in NHTS stops at age 17, so it's not clear how to identify college students
    elif row['SCHTYP']==1: 
        return 'CollegeStudent'
    
    # full-time workers
    elif row['WKFTPT']==1: 
        return 'FullTimeWorker'
    
    # part-time workers
    elif row['WKFTPT']==2:
        return 'PartTimeWorker'
    
    # non-working adults age <65
    elif row['R_AGE_IMP']<65:
        return 'NonWorkingAdultUnder65'
    
    # non-working adults age >=65
    else:
        return 'NonWorkingAdult65p'

pers['PERSON_TYPE'] = pers.apply(determine_person_type, axis=1)

In [4]:
# define person type count variables
pers['CHILDREN0_4']  = np.where(pers['PERSON_TYPE']=='Child0_4', 1, 0)
pers['CHILDREN5_15'] = np.where(pers['PERSON_TYPE']=='Child5_15', 1, 0)
pers['CHILDREN16P']  = np.where(pers['PERSON_TYPE']=='Child16p', 1, 0)
pers['COLLEGE_STUDENTS']  = np.where(pers['PERSON_TYPE']=='CollegeStudent', 1, 0)
pers['FULL_TIME_WORKERS']  = np.where(pers['PERSON_TYPE']=='FullTimeWorker', 1, 0)
pers['PART_TIME_WORKERS']  = np.where(pers['PERSON_TYPE']=='PartTimeWorker', 1, 0)
pers['NW_ADULTS_UNDER65']  = np.where(pers['PERSON_TYPE']=='NonWorkingAdultUnder65', 1, 0)
pers['NW_ADULTS_65P']  = np.where(pers['PERSON_TYPE']=='NonWorkingAdult65p', 1, 0)


In [5]:
# additional person count variables
pers['CHILDREN'] = np.where(pers['R_AGE_IMP']<18, 1, 0)
pers['WORKERS'] = np.where(((pers['WKFTPT']==1) | (pers['WKFTPT']==2)), 1, 0)
pers['PERSONS16P'] = np.where(pers['R_AGE_IMP']>=16, 1, 0)
pers['DRIVERS'] = np.where(pers['DRIVER']==1,1,0)
pers['MEN16P'] = np.where(((pers['R_AGE_IMP']>=16) & (pers['R_SEX_IMP']==1)), 1, 0)
pers['WOMEN16P'] = np.where(((pers['R_AGE_IMP']>=16) & (pers['R_SEX_IMP']==2)), 1, 0)
pers['BORN_IN_US16P'] = np.where(((pers['R_AGE_IMP']>=16) & (pers['BORNINUS']==1)), 1, 0)
pers['BORN_OUTSIDE_US16P'] = np.where(((pers['R_AGE_IMP']>=16) & (pers['BORNINUS']==2)), 1, 0)


In [6]:
# process distance to work to convert missing values to zeros
pers['DISTTOWK17'] = np.where(pers['DISTTOWK17']<0, 0, pers['DISTTOWK17'])

In [7]:
# aggregate person data to household level
person_specs = {'CHILDREN0_4' : sum, 
                'CHILDREN5_15': sum, 
                'CHILDREN16P': sum, 
                'COLLEGE_STUDENTS': sum, 
                'FULL_TIME_WORKERS': sum, 
                'PART_TIME_WORKERS': sum, 
                'NW_ADULTS_UNDER65': sum, 
                'NW_ADULTS_65P': sum, 
                'WORKERS': sum, 
                'CHILDREN': sum, 
                'PERSONS16P': sum, 
                'DRIVERS': sum, 
                'MEN16P': sum, 
                'WOMEN16P': sum, 
                'BORN_IN_US16P': sum, 
                'BORN_OUTSIDE_US16P': sum, 
                'DISTTOWK17': sum
               }

pershh = pers.groupby('HOUSEID').agg(person_specs)
pershh['DISTTOWK_SUM'] = pershh['DISTTOWK17']
pershh['DISTTOWK_AVG'] = np.where(pershh['WORKERS']>0, pershh['DISTTOWK17'] / pershh['WORKERS'], 0)


In [8]:
# join the household and person files to the vehicle file
estfile = veh.merge(hh, how='left', on='HOUSEID', suffixes=(None, '_x'))
estfile = estfile.merge(pershh, how='left', on='HOUSEID', suffixes=(None, '_x'))

# drop duplicate columns
duplicate_columns = []
for col in estfile.columns: 
    if '_x' in col: 
        duplicate_columns.append(col)
estfile = estfile.drop(columns=duplicate_columns)

### Group the alternatives into more aggregate categories, and drop records missing our choice alternatives

![image.png](attachment:image.png)

In [9]:
# group body types into more limited categories
def determine_body_type(row):
    
    # automobile/car/station wagon
    if row['VEHTYPE']==1: 
        return 1
    
    # van (Mini/Cargo/Passenger)
    elif row['VEHTYPE']==2: 
        return 2
    
    # SUV
    elif row['VEHTYPE']==3: 
        return 3
    
    # pickup/other truck
    elif row['VEHTYPE']==4 or row['VEHTYPE']==5: 
        return 4
    
    # motorcycle
    elif row['VEHTYPE']==7: 
        return 5
    
    # everything else
    else: 
        return 97
    
estfile['VEHTYPE2'] = estfile.apply(determine_body_type, axis=1)
estfile['VEHTYPE2_TXT'] = estfile['VEHTYPE2'].replace({1:'1-Car', 
                                                       2:'2-Van', 
                                                       3:'3-SUV', 
                                                       4:'4-Pickup/Truck', 
                                                       5:'5-Motorcycle', 
                                                       97:'97-Other'})

![image.png](attachment:image.png)
![image-2.png](attachment:image-2.png)

In [10]:
# group fuel types into a more limited number of categories
def determine_fuel_type(row):
    
    # gas
    if row['FUELTYPE']==1:
        return 1
    
    # diesel
    if row['FUELTYPE']==2: 
        return 2
    
    # hybrid
    elif row['FUELTYPE']==3 and row['HFUEL']==4: 
        return 3
    
    # plug-in hybrid
    elif row['FUELTYPE']==3 and row['HFUEL']==2: 
        return 4
    
    # battery-electric
    elif row['FUELTYPE']==3 and row['HFUEL']==3: 
        return 5
    
    # don't know/other
    else:
        return 97
    
estfile['FUELTYPE2'] = estfile.apply(determine_fuel_type, axis=1)
estfile['FUELTYPE2_TXT'] = estfile['FUELTYPE2'].replace({1:'1-Gas', 
                                                         2:'2-Diesel',
                                                         3:'3-Hybrid', 
                                                         4:'4-Plug-In Hybrid', 
                                                         5:'5-Battery Electric', 
                                                         97:'97-Other'})

In [11]:
# group vehicle ages into a more limited number of categories
def determine_vehicle_age_group(row):
    
    if row['VEHAGE']<0:
        return 97
    elif row['VEHAGE']<=5: 
        return 1
    elif row['VEHAGE']<=10:
        return 2
    else:
        return 3
    
estfile['VEHAGE2'] = estfile.apply(determine_vehicle_age_group, axis=1)
estfile['VEHAGE2_TXT'] = estfile['VEHAGE2'].replace({1:'1: 1-5 years', 
                                                     2:'2: 6-10 years', 
                                                     3:'3: 11+ years', 
                                                    97:'97: Missing/Unknown'})

In [12]:
# group vehicle ages into new vs used
# we probably won't use this, but it will be interesting for future use
estfile['NEW_VEHICLE'] = np.where(estfile['VEHOWNED']==2, 1, 0)

In [13]:
# exclude missing body types, fuel types, and age
estfile = estfile[estfile['VEHTYPE2']!=97]
estfile = estfile[estfile['FUELTYPE2']!=97]
estfile = estfile[estfile['VEHAGE2']!=97]

# exclude diesel, hybrid, and PEV motorcyles b/c a tiny share and of little interest
estfile = estfile[(estfile['VEHTYPE2']!=5) | (estfile['FUELTYPE2']!=2)]
estfile = estfile[(estfile['VEHTYPE2']!=5) | (estfile['FUELTYPE2']!=3)]
estfile = estfile[(estfile['VEHTYPE2']!=5) | (estfile['FUELTYPE2']!=3)]

### Rank vehicles by how many miles they are driven

In [14]:
# rank vehicles by miles driven
estfile["RANK"] = estfile.groupby("HOUSEID")["BESTMILE"].rank("first", ascending=False)

In [15]:
# stats by rank of vehicles
pd.pivot_table(estfile, 
               index=['RANK'], 
               values=['BESTMILE'], 
               aggfunc=['count', 'mean', 'sum'], 
               fill_value=0)

Unnamed: 0_level_0,count,mean,sum
Unnamed: 0_level_1,BESTMILE,BESTMILE,BESTMILE
RANK,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1.0,123059,14383.74,1770048674.88
2.0,80940,8064.07,652705614.37
3.0,30478,5274.67,160761301.92
4.0,10677,3721.19,39731138.59
5.0,3589,2678.66,9613721.57
6.0,1289,1985.13,2558828.89
7.0,507,1547.02,784339.38
8.0,239,1391.59,332589.11
9.0,124,1157.1,143479.93
10.0,63,1145.45,72163.51


In [16]:
# stats by rank of vehicles
pd.pivot_table(estfile, 
               index=['RANK'], 
               columns=['PERSONS16P'], 
               values=['BESTMILE'], 
               aggfunc=['count', 'mean'], 
               fill_value=0)

Unnamed: 0_level_0,count,count,count,count,count,count,count,count,count,count,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
PERSONS16P,1,2,3,4,5,6,7,8,9,10,1,2,3,4,5,6,7,8,9,10
RANK,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
1.0,39462,67925,11473,3327,679,149,33,9,1,1,10891.85,15362.13,18396.79,20151.35,21003.49,23180.0,21740.13,18733.15,13976.4,36661.69
2.0,8930,57518,10537,3141,633,141,30,8,1,1,5523.8,7810.98,10103.87,11736.08,12708.04,15017.81,16066.22,12724.64,13415.37,19899.77
3.0,2278,17560,7339,2591,546,126,29,7,1,1,3411.98,4492.61,6379.75,7987.36,8903.97,10414.02,10572.08,10265.94,12291.49,17805.46
4.0,711,5253,2549,1633,398,102,24,5,1,1,2430.41,2832.8,4150.1,5489.0,6409.65,7385.5,8657.05,8302.25,10984.36,15649.61
5.0,250,1695,808,532,213,72,14,3,1,1,1749.16,1949.79,2806.5,3846.86,4791.05,5619.76,5996.89,7961.05,10721.89,13679.38
6.0,103,620,263,179,78,33,9,2,1,1,1621.77,1470.28,2095.32,2519.19,3303.61,4063.38,5973.11,6813.11,5867.99,13179.99
7.0,58,238,107,61,28,8,4,1,1,1,1514.12,1218.14,1640.2,1579.06,2684.0,3459.32,3563.82,1500.0,5567.18,10630.68
8.0,37,113,50,21,9,6,1,1,0,1,1314.68,1145.48,1426.12,1337.94,2697.95,3210.29,5262.86,300.0,0.0,5997.62
9.0,23,62,21,7,6,3,1,0,0,1,1469.89,847.42,734.03,1766.74,2540.91,1523.91,3749.67,0.0,0.0,5783.77
10.0,16,31,9,4,0,1,1,0,0,1,1514.99,1023.39,1039.39,1002.56,0.0,1000.0,0.0,0.0,0.0,1834.0


In [17]:
# keep only the four vehicles in each household (limit of ActivitySim car ownership model)
# keep the vehicles that have the most mileage

estfile = estfile[estfile['RANK']<=4]

In [18]:
estfile.head()

Unnamed: 0,HOUSEID,VEHID,VEHYEAR,VEHAGE,MAKE,MODEL,FUELTYPE,VEHTYPE,WHOMAIN,OD_READ,HFUEL,VEHOWNED,VEHOWNMO,ANNMILES,HYBRID,PERSONID,TRAVDAY,HOMEOWN,HHSIZE,HHVEHCNT,HHFAMINC,DRVRCNT,HHSTATE,HHSTFIPS,NUMADLT,WRKCOUNT,TDAYDATE,LIF_CYC,MSACAT,MSASIZE,RAIL,URBAN,URBANSIZE,URBRUR,CENSUS_D,CENSUS_R,CDIVMSAR,HH_RACE,HH_HISP,HH_CBSA,SMPLSRCE,WTHHFIN,BESTMILE,BEST_FLG,BEST_EDT,BEST_OUT,HBHUR,HTHTNRNT,HTPPOPDN,...,PC,SPHONE,TAB,WALK,BIKE,CAR,TAXI,BUS,TRAIN,PARA,PRICE,PLACE,WALK2SAVE,BIKE2SAVE,PTRANS,HHRELATD,CNTTDHH,YOUNGCHILD,HHRESP,SCRESP,RESP_CNT,WEBUSE17,CHILDREN0_4,CHILDREN5_15,CHILDREN16P,COLLEGE_STUDENTS,FULL_TIME_WORKERS,PART_TIME_WORKERS,NW_ADULTS_UNDER65,NW_ADULTS_65P,WORKERS,CHILDREN,PERSONS16P,DRIVERS,MEN16P,WOMEN16P,BORN_IN_US16P,BORN_OUTSIDE_US16P,DISTTOWK17,DISTTOWK_SUM,DISTTOWK_AVG,VEHTYPE2,VEHTYPE2_TXT,FUELTYPE2,FUELTYPE2_TXT,VEHAGE2,VEHAGE2_TXT,NEW_VEHICLE,RANK
0,30000007,1,2007,10,49,49032,1,1,3,69000,-1,1,-1,1000,2,3,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,14611.93,1,-1,4,T,50,1500,...,2,1,2,5,5,1,5,4,5,5,1,2,4,4,4,1,6,0,1,1,3,1,0,0,0,0,1,0,1,1,1,0,3,3,1,2,3,0,82.61,82.61,82.61,1,1-Car,1,1-Gas,2,2: 6-10 years,0,1.0
1,30000007,2,2004,13,49,49442,1,2,-8,164000,-1,1,-1,2500,2,-8,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,4767.09,3,-1,-1,T,50,1500,...,2,1,2,5,5,1,5,4,5,5,1,2,4,4,4,1,6,0,1,1,3,1,0,0,0,0,1,0,1,1,1,0,3,3,1,2,3,0,82.61,82.61,82.61,2,2-Van,1,1-Gas,3,3: 11+ years,0,4.0
2,30000007,3,1998,19,19,19014,1,1,1,120000,-1,1,-1,900,2,1,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,8000.32,1,-1,-1,T,50,1500,...,2,1,2,5,5,1,5,4,5,5,1,2,4,4,4,1,6,0,1,1,3,1,0,0,0,0,1,0,1,1,1,0,3,3,1,2,3,0,82.61,82.61,82.61,1,1-Car,1,1-Gas,3,3: 11+ years,0,3.0
4,30000007,5,1993,24,20,20481,1,4,2,300000,-1,1,-1,10000,2,2,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,12437.66,1,-1,-1,T,50,1500,...,2,1,2,5,5,1,5,4,5,5,1,2,4,4,4,1,6,0,1,1,3,1,0,0,0,0,1,0,1,1,1,0,3,3,1,2,3,0,82.61,82.61,82.61,4,4-Pickup/Truck,1,1-Gas,3,3: 11+ years,0,2.0
5,30000008,1,2014,3,20,20028,1,1,97,-88,-1,1,-1,8000,2,97,5,1,2,4,8,2,WI,55,2,2,201608,2,2,5,2,4,6,2,3,2,32,1,2,33460,2,69.51,6099.62,3,-1,-1,R,5,300,...,1,1,2,4,4,1,5,5,5,5,5,2,4,4,5,1,2,0,2,1,2,1,0,0,0,0,2,0,0,0,2,0,2,2,1,1,2,0,11.18,11.18,5.59,1,1-Car,1,1-Gas,1,1: 1-5 years,0,4.0


### Based on the rank of the vehicles, determine the types of vehicles already owned

In [19]:
# first create flags based on the vehicle types

ownership_data = estfile[['HOUSEID', 'RANK', 'VEHTYPE2', 'FUELTYPE2']].copy()

# vehicle type
ownership_data['CARS'] = np.where(ownership_data['VEHTYPE2']==1, 1, 0)
ownership_data['VANS'] = np.where(ownership_data['VEHTYPE2']==2, 1, 0)
ownership_data['SUVS'] = np.where(ownership_data['VEHTYPE2']==3, 1, 0)
ownership_data['PUS']  = np.where(ownership_data['VEHTYPE2']==4, 1, 0)
ownership_data['MCS']  = np.where(ownership_data['VEHTYPE2']==5, 1, 0)

# fuel type
ownership_data['GASS'] = np.where(ownership_data['FUELTYPE2']==1, 1, 0)
ownership_data['DSLS'] = np.where(ownership_data['FUELTYPE2']==2, 1, 0)
ownership_data['HYBS'] = np.where(ownership_data['FUELTYPE2']==3, 1, 0)
ownership_data['PEVS'] = np.where(ownership_data['FUELTYPE2']==4, 1, 0)
ownership_data['BEVS'] = np.where(ownership_data['FUELTYPE2']==5, 1, 0)

ownership_data = ownership_data.drop(columns=['VEHTYPE2', 'FUELTYPE2'])

In [20]:
# create fields for count of vehicles and initialize with zeros
# for vehicle rank 1, this always stays at zero

estfile['CAR_COUNT'] = 0
estfile['VAN_COUNT'] = 0
estfile['SUV_COUNT'] = 0
estfile['PU_COUNT'] = 0
estfile['MC_COUNT'] = 0

estfile['GAS_COUNT'] = 0
estfile['DSL_COUNT'] = 0
estfile['HYB_COUNT'] = 0
estfile['PEV_COUNT'] = 0
estfile['BEV_COUNT'] = 0

In [21]:
# for rank 2 vehicles, only count the number of rank 1 vehicles

rank1_ownership = ownership_data[ownership_data['RANK']==1]
rank1_counts = rank1_ownership.groupby('HOUSEID').agg(sum).reset_index()
estfile = estfile.merge(rank1_counts, how='left', on='HOUSEID', suffixes=(None, '_x'))

estfile['CAR_COUNT'] = np.where(estfile['RANK']==2, estfile['CARS'], estfile['CAR_COUNT'])
estfile['VAN_COUNT'] = np.where(estfile['RANK']==2, estfile['VANS'], estfile['VAN_COUNT'])
estfile['SUV_COUNT'] = np.where(estfile['RANK']==2, estfile['SUVS'], estfile['SUV_COUNT'])
estfile['PU_COUNT']  = np.where(estfile['RANK']==2, estfile['PUS'],  estfile['PU_COUNT'])
estfile['MC_COUNT']  = np.where(estfile['RANK']==2, estfile['MCS'],  estfile['MC_COUNT'])

estfile['GAS_COUNT'] = np.where(estfile['RANK']==2, estfile['GASS'], estfile['GAS_COUNT'])
estfile['DSL_COUNT'] = np.where(estfile['RANK']==2, estfile['DSLS'], estfile['DSL_COUNT'])
estfile['HYB_COUNT'] = np.where(estfile['RANK']==2, estfile['HYBS'], estfile['HYB_COUNT'])
estfile['PEV_COUNT'] = np.where(estfile['RANK']==2, estfile['PEVS'], estfile['PEV_COUNT'])
estfile['BEV_COUNT'] = np.where(estfile['RANK']==2, estfile['BEVS'], estfile['BEV_COUNT'])

estfile = estfile.drop(columns=['CARS', 'VANS', 'SUVS', 'PUS', 'MCS', 'GASS', 'DSLS', 'HYBS', 'PEVS', 'BEVS'])

In [22]:
# for rank 3 vehicles, only count the number of rank 1 AND 2 vehicles

rank2_ownership = ownership_data[ownership_data['RANK']<=2]
rank2_counts = rank2_ownership.groupby('HOUSEID').agg(sum).reset_index()
estfile = estfile.merge(rank2_counts, how='left', on='HOUSEID', suffixes=(None, '_x'))

estfile['CAR_COUNT'] = np.where(estfile['RANK']==3, estfile['CARS'], estfile['CAR_COUNT'])
estfile['VAN_COUNT'] = np.where(estfile['RANK']==3, estfile['VANS'], estfile['VAN_COUNT'])
estfile['SUV_COUNT'] = np.where(estfile['RANK']==3, estfile['SUVS'], estfile['SUV_COUNT'])
estfile['PU_COUNT']  = np.where(estfile['RANK']==3, estfile['PUS'],  estfile['PU_COUNT'])
estfile['MC_COUNT']  = np.where(estfile['RANK']==3, estfile['MCS'],  estfile['MC_COUNT'])

estfile['GAS_COUNT'] = np.where(estfile['RANK']==3, estfile['GASS'], estfile['GAS_COUNT'])
estfile['DSL_COUNT'] = np.where(estfile['RANK']==3, estfile['DSLS'], estfile['DSL_COUNT'])
estfile['HYB_COUNT'] = np.where(estfile['RANK']==3, estfile['HYBS'], estfile['HYB_COUNT'])
estfile['PEV_COUNT'] = np.where(estfile['RANK']==3, estfile['PEVS'], estfile['PEV_COUNT'])
estfile['BEV_COUNT'] = np.where(estfile['RANK']==3, estfile['BEVS'], estfile['BEV_COUNT'])

estfile = estfile.drop(columns=['CARS', 'VANS', 'SUVS', 'PUS', 'MCS', 'GASS', 'DSLS', 'HYBS', 'PEVS', 'BEVS'])

In [23]:
# for rank 4 vehicles, count the number of rank 1, 2 AND 2 vehicles

rank3_ownership = ownership_data[ownership_data['RANK']<=3]
rank3_counts = rank3_ownership.groupby('HOUSEID').agg(sum).reset_index()
estfile = estfile.merge(rank3_counts, how='left', on='HOUSEID', suffixes=(None, '_x'))

estfile['CAR_COUNT'] = np.where(estfile['RANK']==4, estfile['CARS'], estfile['CAR_COUNT'])
estfile['VAN_COUNT'] = np.where(estfile['RANK']==4, estfile['VANS'], estfile['VAN_COUNT'])
estfile['SUV_COUNT'] = np.where(estfile['RANK']==4, estfile['SUVS'], estfile['SUV_COUNT'])
estfile['PU_COUNT']  = np.where(estfile['RANK']==4, estfile['PUS'],  estfile['PU_COUNT'])
estfile['MC_COUNT']  = np.where(estfile['RANK']==4, estfile['MCS'],  estfile['MC_COUNT'])

estfile['GAS_COUNT'] = np.where(estfile['RANK']==4, estfile['GASS'], estfile['GAS_COUNT'])
estfile['DSL_COUNT'] = np.where(estfile['RANK']==4, estfile['DSLS'], estfile['DSL_COUNT'])
estfile['HYB_COUNT'] = np.where(estfile['RANK']==4, estfile['HYBS'], estfile['HYB_COUNT'])
estfile['PEV_COUNT'] = np.where(estfile['RANK']==4, estfile['PEVS'], estfile['PEV_COUNT'])
estfile['BEV_COUNT'] = np.where(estfile['RANK']==4, estfile['BEVS'], estfile['BEV_COUNT'])

estfile = estfile.drop(columns=['CARS', 'VANS', 'SUVS', 'PUS', 'MCS', 'GASS', 'DSLS', 'HYBS', 'PEVS', 'BEVS'])

### look at options for how we might structure the alternatives for each, consider the count of observations and the average miles driven per vehicle

In [24]:
# vehicle type by fuel type
pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT'], 
               columns=['FUELTYPE2_TXT'], 
               values=['BESTMILE'], 
               aggfunc=['count', 'mean'], 
               fill_value=0)

Unnamed: 0_level_0,count,count,count,count,count,mean,mean,mean,mean,mean
Unnamed: 0_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
FUELTYPE2_TXT,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric
VEHTYPE2_TXT,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
1-Car,113366,1158,4416,434,450,10565.16,11984.98,12752.93,12073.42,10337.94
2-Van,12836,94,14,3,1,11751.09,14545.47,20616.3,7074.17,10168.93
3-SUV,58623,375,520,21,12,11878.62,12407.97,11806.29,12770.84,9808.94
4-Pickup/Truck,40907,5044,10,0,1,10077.71,12186.25,14223.92,0.0,-9.0
5-Motorcycle,6850,0,0,0,19,1676.68,0.0,0.0,0.0,1175.26


In [25]:
# segmenting by vehicle age, like we initially proposed

pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT','FUELTYPE2_TXT'], 
               columns=['VEHAGE2_TXT'],
               values=['BESTMILE'], 
               aggfunc=['count', 'mean'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,VEHAGE2_TXT,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years
VEHTYPE2_TXT,FUELTYPE2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
1-Car,1-Gas,36224,30160,46982,12516.13,11110.57,8710.81
1-Car,2-Diesel,518,242,398,13924.23,13171.44,8739.61
1-Car,3-Hybrid,2382,1537,497,13460.58,12162.66,11186.8
1-Car,4-Plug-In Hybrid,406,24,4,12102.02,12642.2,5757.4
1-Car,5-Battery Electric,416,28,6,10549.84,8849.96,2590.63
2-Van,1-Gas,3460,3177,6199,14646.53,12563.5,9718.63
2-Van,2-Diesel,29,14,51,22296.71,14611.2,10119.87
2-Van,3-Hybrid,11,1,2,12689.97,18248.07,65395.2
2-Van,4-Plug-In Hybrid,3,0,0,7074.17,0.0,0.0
2-Van,5-Battery Electric,1,0,0,10168.93,0.0,0.0


In [26]:
# body type by age, with more detailed age groups

pd.pivot_table(estfile, 
               index=['VEHAGE'], 
               columns=['VEHTYPE2_TXT'], 
               values=['BESTMILE'], 
               aggfunc=['count', 'mean'], 
               fill_value=0)

Unnamed: 0_level_0,count,count,count,count,count,mean,mean,mean,mean,mean
Unnamed: 0_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
VEHTYPE2_TXT,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle
VEHAGE,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3
1,6775,569,6204,2119,220,12938.23,15039.98,13637.16,15263.09,4644.9
2,8012,697,6130,2365,273,13822.24,17956.61,13647.49,15464.36,3241.04
3,8385,812,5143,2168,312,11684.34,13188.39,12517.85,13299.68,2665.48
4,8876,659,4713,1912,283,12266.46,13871.97,13081.53,12956.72,2589.41
5,7898,767,3946,1728,322,12245.1,13781.58,12962.07,13205.06,1996.51
6,5979,595,3594,1711,191,11512.4,13147.58,12464.79,12197.94,1717.02
7,6488,632,2916,1549,239,11276.62,12760.13,12269.32,12723.5,1769.18
8,5753,443,2139,1284,384,11046.84,12863.11,11797.46,10998.62,1675.7
9,6900,772,3227,1938,408,11194.09,12586.43,12082.28,11311.82,2188.0
10,6871,750,3030,2142,455,10877.66,11779.67,11930.08,11157.17,1550.19


From the above tables, we can make three observations:

1. There are no very many plug-in hybrids or battery electric vehicles.  Consider merging these groups. 
2. Motorcycles are not driven very many miles.  I would consider dropping them, but that might be problematic if they are already counted in ActivitySim's car ownership model.  
3. New cars are driven more miles per year than old cars.  Is it worth keeping vehicle age? 
4. There are some old cars on the road!

Based on this, keep motorcycles for now, and carry forward either the option with age groups, or with no age consideration.

In [27]:
# code simple choice option
estfile['CHOICE1'] = 10*estfile['VEHTYPE2'] + estfile['FUELTYPE2']

estfile['CHOICE1_TXT'] = estfile['CHOICE1'].replace({11:'Car-Gas', 
                                                     12:'Car-Diesel',
                                                     13:'Car-Hybrid',
                                                     14:'Car-PEV',
                                                     15:'Car-BEV',
                                                     21:'Van-Gas', 
                                                     22:'Van-Diesel',
                                                     23:'Van-Hybrid', 
                                                     24:'Van-PEV',    
                                                     25:'Van-BEV',
                                                     31:'SUV-Gas',   
                                                     32:'SUV-Diesel',
                                                     33:'SUV-Hybrid',
                                                     34:'SUV-PEV', 
                                                     35:'SUV-BEV',
                                                     41:'Pickup-Gas', 
                                                     42:'Pickup-Diesel',
                                                     43:'Pickup-Hybrid', 
                                                     44:'Pickup-PEV', 
                                                     45:'Pickup-BEV',
                                                     51:'Motorcycle-Gas', 
                                                     52:'Motorcycle-Diesel',
                                                     53:'Motorcycle-Hybrid', 
                                                     54:'Motorcycle-PEV',
                                                     55:'Motorcycle-BEV'
                                                     })

In [28]:
# code choice option with age
estfile['CHOICE2'] = 100*estfile['VEHTYPE2'] + 10*estfile['FUELTYPE2'] + estfile['VEHAGE2'] 

estfile['CHOICE2_TXT'] = estfile['CHOICE2'].replace({111:'Car-Gas-Age15', 
                                                     112:'Car-Gas-Age610', 
                                                     113:'Car-Gas-Age11P', 
                                                     121:'Car-Diesel-Age15',
                                                     122:'Car-Diesel-Age610',
                                                     123:'Car-Diesel-Age11P',                                                                                                 
                                                     131:'Car-Hybrid-Age15',                                                                                                      
                                                     132:'Car-Hybrid-Age610',                                                                                                     
                                                     133:'Car-Hybrid-Age11P',                                                   
                                                     141:'Car-PEV-Age15',                                               
                                                     142:'Car-PEV-Age610',                                               
                                                     143:'Car-PEV-Age11P',                                                    
                                                     151:'Car-BEV-Age15',                                               
                                                     152:'Car-BEV-Age610',                                               
                                                     153:'Car-BEV-Age11P',     

                                                     211:'Van-Gas-Age15', 
                                                     212:'Van-Gas-Age610', 
                                                     213:'Van-Gas-Age11P', 
                                                     221:'Van-Diesel-Age15',
                                                     222:'Van-Diesel-Age610',
                                                     223:'Van-Diesel-Age11P',                                                                                                     
                                                     231:'Van-Hybrid-Age15',                                                                                                      
                                                     232:'Van-Hybrid-Age610',                                                                                                     
                                                     233:'Van-Hybrid-Age11P',                                                     
                                                     241:'Van-PEV-Age15',                                               
                                                     242:'Van-PEV-Age610',                                               
                                                     243:'Van-PEV-Age11P',                                                     
                                                     251:'Van-BEV-Age15',                                               
                                                     252:'Van-BEV-Age610',                                               
                                                     253:'Van-BEV-Age11P',   

                                                     311:'SUV-Gas-Age15', 
                                                     312:'SUV-Gas-Age610', 
                                                     313:'SUV-Gas-Age11P', 
                                                     321:'SUV-Diesel-Age15',
                                                     322:'SUV-Diesel-Age610',
                                                     323:'SUV-Diesel-Age11P',                                                                                                     
                                                     331:'SUV-Hybrid-Age15',                                                                                                      
                                                     332:'SUV-Hybrid-Age610',                                                                                                     
                                                     333:'SUV-Hybrid-Age11P',                                                     
                                                     341:'SUV-PEV-Age15',                                               
                                                     342:'SUV-PEV-Age610',                                               
                                                     343:'SUV-PEV-Age11P',                                                
                                                     351:'SUV-BEV-Age15',                                               
                                                     352:'SUV-BEV-Age610',                                               
                                                     353:'SUV-BEV-Age11P',       
                                                     
                                                     411:'Pickup-Gas-Age15', 
                                                     412:'Pickup-Gas-Age610', 
                                                     413:'Pickup-Gas-Age11P', 
                                                     421:'Pickup-Diesel-Age15',
                                                     422:'Pickup-Diesel-Age610',
                                                     423:'Pickup-Diesel-Age11P',                                                                                                  
                                                     431:'Pickup-Hybrid-Age15',                                                                                                   
                                                     432:'Pickup-Hybrid-Age610',                                                                                                      
                                                     433:'Pickup-Hybrid-Age11P',                                                   
                                                     441:'Pickup-PEV-Age15',                                               
                                                     442:'Pickup-PEV-Age610',                                               
                                                     443:'Pickup-PEV-Age11P',                                                
                                                     451:'Pickup-BEV-Age15',                                               
                                                     452:'Pickup-BEV-Age610',                                               
                                                     453:'Pickup-BEV-Age11P',     

                                                     511:'Motorcycle-Gas-Age15', 
                                                     512:'Motorcycle-Gas-Age610', 
                                                     513:'Motorcycle-Gas-Age11P', 
                                                     521:'Motorcycle-Diesel-Age15',
                                                     522:'Motorcycle-Diesel-Age610',
                                                     523:'Motorcycle-Diesel-Age11P',                                                                                                  
                                                     531:'Motorcycle-Hybrid-Age15',                                                                                                   
                                                     532:'Motorcycle-Hybrid-Age610',                                                                                                      
                                                     533:'Motorcycle-Hybrid-Age11P',                                                  
                                                     541:'Motorcycle-PEV-Age15',                                               
                                                     542:'Motorcycle-PEV-Age610',                                               
                                                     543:'Motorcycle-PEV-Age11P',                                                      
                                                     551:'Motorcycle-BEV-Age15',                                               
                                                     552:'Motorcycle-BEV-Age610',                                               
                                                     553:'Motorcycle-BEV-Age11P',     
                                                     })

In [29]:
# code choice option of body type by detailed age

# cap vehicle age at 20+
estfile['VEHAGE_C20'] = estfile['VEHAGE'].apply(lambda x : min(x,20))

# choice3 is vehicle type by age
estfile['CHOICE3'] = 100*estfile['VEHTYPE2'] + estfile['VEHAGE_C20']


In [30]:
# body type and fuel type by detailed age

pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT','FUELTYPE2_TXT'], 
               columns=['VEHAGE_C20'],
               values=['BESTMILE'], 
               aggfunc=['count', 'mean'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_1,Unnamed: 1_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,VEHAGE_C20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
VEHTYPE2_TXT,FUELTYPE2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3
1-Car,1-Gas,6248,7287,7576,7941,7172,5604,5924,5493,6589,6550,6160,5680,4915,4481,3961,3621,3239,2422,1990,10513,12965.06,13819.99,11604.16,12135.95,12184.54,11456.85,11140.53,11026.02,11133.85,10834.68,9955.48,9594.24,9759.26,9286.55,9215.83,8334.99,8085.74,8141.21,8003.13,7165.56
1-Car,2-Diesel,13,96,138,148,123,79,75,54,25,9,61,36,30,47,37,21,10,15,10,131,11932.65,13810.41,14001.93,13767.25,14325.25,12765.0,12642.96,11515.81,14578.05,27169.72,12356.76,10910.46,7502.61,9957.21,10410.33,6787.83,9413.9,9054.89,8528.65,6074.69
1-Car,3-Hybrid,313,440,504,615,510,259,485,202,282,309,144,177,82,38,27,19,7,0,0,3,13474.28,14576.61,12709.37,13753.56,12878.4,12579.26,12705.66,11556.71,12155.73,11363.61,11622.84,11850.63,10737.39,10124.18,7599.2,9958.26,11287.9,0.0,0.0,16667.1
1-Car,4-Plug-In Hybrid,101,66,93,95,51,11,4,4,3,2,2,1,0,1,0,0,0,0,0,0,12623.19,13375.22,10768.15,12491.82,11128.52,10609.43,13934.23,7568.83,28584.93,7470.96,6558.16,5230.81,0.0,4682.48,0.0,0.0,0.0,0.0,0.0,0.0
1-Car,5-Battery Electric,100,123,74,77,42,26,0,0,1,1,0,1,1,0,1,0,0,0,1,2,10033.09,11505.87,9740.27,10684.86,10159.22,9433.7,0.0,0.0,108.13,2414.41,0.0,2.72,1818.92,0.0,1592.19,0.0,0.0,0.0,7427.33,2351.32
2-Van,1-Gas,554,692,802,652,760,593,630,442,765,747,927,966,618,559,557,486,529,337,244,976,15042.82,17707.96,13154.39,13895.1,13789.38,13129.55,12770.45,12850.93,12531.07,11802.75,10648.7,10981.1,11149.54,9509.77,10701.79,8883.54,8961.08,8565.95,7563.55,8001.44
2-Van,2-Diesel,9,5,7,4,4,2,2,0,7,3,13,5,8,1,1,1,2,1,1,18,18577.43,52368.62,15965.06,13484.15,12968.11,18494.43,9507.52,0.0,18636.47,6032.55,12765.82,14387.67,7472.09,19574.09,11514.99,23410.31,11985.22,135.79,6659.79,7398.73
2-Van,3-Hybrid,4,0,2,3,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,11285.13,0.0,18615.36,9361.3,14567.28,0.0,0.0,18248.07,0.0,0.0,0.0,5967.52,0.0,0.0,124822.88,0.0,0.0,0.0,0.0,0.0
2-Van,4-Plug-In Hybrid,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5845.84,0.0,0.0,0.0,9530.84,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2-Van,5-Battery Electric,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,10168.93,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [31]:
# fourth choice option: vehicle type by fuel type by detailed age
estfile['CHOICE4'] = 1000*estfile['VEHTYPE2'] + 100*estfile['FUELTYPE2'] + estfile['VEHAGE_C20'] 

# also set up as an array of 1 thorugh 500 to meet ALOGIT constraints
i = 1
choice4_to_index = {}
for v in range(1,6):
    for f in range(1,6):
        for a in range(1,21):
            choice4 = 1000*v + 100*f + a            
            choice4_to_index[choice4] = i
            i+=1
            
estfile['CHOICE4B'] = estfile['CHOICE4'].apply(lambda x : choice4_to_index[x]) 

### For each option, figure out how we would sample any missing dimensions

In [32]:
# convert to a more limited set of incomes from codes in NHTS (2017 dollars)
# $0-24,999
# $25,000-$49,999      
# $50,000-$99,999  (reference case)
# $100,000-$149,990
# $150,000+

def determine_income_group(row):
    
    if row['HHFAMINC']<0:
        return 97
    elif row['HHFAMINC']<=3: 
        return 1
    elif row['HHFAMINC']<=5: 
        return 2
    elif row['HHFAMINC']<=7: 
        return 3
    elif row['HHFAMINC']<=9: 
        return 4
    else:
        return 5
    
estfile['HHFAMINC2'] = estfile.apply(determine_income_group, axis=1)
estfile['HHFAMINC2_TXT'] = estfile['HHFAMINC2'].replace({1:'1: $0-24,999', 
                                                       2:'2: $25,000-$49,999', 
                                                       3:'3: $50,000-$99,999', 
                                                       4:'4: $100,000-$149,990', 
                                                       5:'5: $150,000+', 
                                                      97:'97: Missing/Unknown'})
    

In [33]:
# if we model body type by fuel type, we would sample the age distibution for each, 
# be sure to account for income, because higher income HHs own newer cars

pd.pivot_table(estfile[estfile['HHFAMINC2']<97], 
               index=['VEHTYPE2_TXT', 'FUELTYPE2_TXT', 'HHFAMINC2_TXT'], 
               columns=['VEHAGE_C20'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,Unnamed: 1_level_2,VEHAGE_C20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
VEHTYPE2_TXT,FUELTYPE2_TXT,HHFAMINC2_TXT,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3
1-Car,1-Gas,"1: $0-24,999",474,552,659,671,671,545,605,645,772,883,903,936,791,826,797,752,704,576,461,2206
1-Car,1-Gas,"2: $25,000-$49,999",1025,1219,1400,1354,1327,1076,1133,1113,1223,1331,1292,1232,1101,1004,940,819,756,593,452,2292
1-Car,1-Gas,"3: $50,000-$99,999",2011,2436,2453,2710,2468,1861,1907,1741,2192,2139,1957,1805,1542,1376,1160,1068,948,686,595,3111
1-Car,1-Gas,"4: $100,000-$149,990",1327,1547,1517,1668,1423,1081,1160,1008,1228,1143,1024,902,790,649,545,506,405,302,265,1502
1-Car,1-Gas,"5: $150,000+",1205,1312,1307,1297,1073,874,923,837,969,854,784,625,541,481,402,345,301,185,166,1056
1-Car,2-Diesel,"1: $0-24,999",1,3,3,8,1,2,0,1,4,0,7,4,5,5,1,2,1,2,1,18
1-Car,2-Diesel,"2: $25,000-$49,999",3,10,17,12,13,7,8,5,3,2,10,3,3,10,10,7,2,1,0,23
1-Car,2-Diesel,"3: $50,000-$99,999",0,23,39,38,33,33,18,17,1,1,17,11,7,12,10,7,2,7,4,38
1-Car,2-Diesel,"4: $100,000-$149,990",3,21,26,42,43,13,20,18,9,3,18,11,11,14,9,4,2,3,3,25
1-Car,2-Diesel,"5: $150,000+",5,35,48,44,30,24,28,13,8,3,7,5,3,3,5,0,3,2,2,24


In [34]:
# or perhaps we would simply sample by vehicle type and income, leaving fuel type out. 

pd.pivot_table(estfile[estfile['HHFAMINC2']<97], 
               index=['VEHTYPE2_TXT', 'HHFAMINC2_TXT'], 
               columns=['VEHAGE_C20'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
Unnamed: 0_level_1,Unnamed: 1_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,VEHAGE_C20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
VEHTYPE2_TXT,HHFAMINC2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
1-Car,"1: $0-24,999",494,579,682,698,695,558,614,656,789,904,916,952,803,833,799,759,705,578,462,2226
1-Car,"2: $25,000-$49,999",1064,1281,1472,1435,1397,1130,1196,1140,1258,1376,1322,1252,1119,1021,959,828,758,594,452,2315
1-Car,"3: $50,000-$99,999",2142,2633,2691,2973,2685,1989,2073,1816,2297,2247,2016,1879,1573,1401,1180,1080,953,693,599,3149
1-Car,"4: $100,000-$149,990",1466,1733,1719,1941,1643,1169,1306,1086,1307,1212,1083,949,816,674,556,515,409,305,268,1528
1-Car,"5: $150,000+",1394,1542,1554,1559,1249,960,1085,902,1037,925,823,678,565,489,412,346,306,187,169,1081
2-Van,"1: $0-24,999",37,42,43,47,52,36,60,42,90,102,138,170,107,118,123,125,120,97,65,284
2-Van,"2: $25,000-$49,999",80,114,145,105,119,82,110,75,163,145,214,223,124,141,131,131,146,87,58,266
2-Van,"3: $50,000-$99,999",210,236,278,226,264,204,223,169,257,259,311,317,197,154,157,129,145,84,69,264
2-Van,"4: $100,000-$149,990",142,178,178,154,178,144,139,82,133,136,147,155,109,86,80,60,73,47,24,102
2-Van,"5: $150,000+",81,108,143,96,131,109,80,64,105,81,110,80,69,43,38,26,31,11,19,49


In [35]:
# if we model body type by age, we would then sample the fuel types

pd.pivot_table(estfile[estfile['HHFAMINC']<97], 
               index=['VEHTYPE2_TXT', 'VEHAGE_C20'], 
               columns=['FUELTYPE2_TXT'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,count,count
Unnamed: 0_level_1,Unnamed: 1_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,FUELTYPE2_TXT,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric
VEHTYPE2_TXT,VEHAGE_C20,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3
1-Car,1,6248,13,313,101,100
1-Car,2,7287,96,440,66,123
1-Car,3,7576,138,504,93,74
1-Car,4,7941,148,615,95,77
1-Car,5,7172,123,510,51,42
1-Car,6,5604,79,259,11,26
1-Car,7,5924,75,485,4,0
1-Car,8,5493,54,202,4,0
1-Car,9,6589,25,282,3,1
1-Car,10,6550,9,309,2,1


In [36]:
# Or perhaps we would simply input the shares of fuel type by body type for a long-range forecast

pd.pivot_table(estfile[estfile['HHFAMINC2']<97], 
               index=['VEHTYPE2_TXT'], 
               columns=['FUELTYPE2_TXT'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0)

Unnamed: 0_level_0,count,count,count,count,count
Unnamed: 0_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
FUELTYPE2_TXT,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric
VEHTYPE2_TXT,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3
1-Car,109806,1126,4287,426,443
2-Van,12422,91,14,3,1
3-SUV,56821,361,505,20,11
4-Pickup/Truck,39601,4902,10,0,1
5-Motorcycle,6710,0,0,0,19


In [37]:
# if we model all three dimensions, we would sample the ages in more detail within each age group
# we've already accounted for income, so it doesn't need to show up again
# we probably want this to be body type specific to avoid baking in further assumptions about fuel type by age

pd.pivot_table(estfile[estfile['HHFAMINC2']<97], 
               index=['VEHTYPE2_TXT', 'VEHAGE2_TXT'], 
               columns=['VEHAGE_C20'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count,count
Unnamed: 0_level_1,Unnamed: 1_level_1,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE,BESTMILE
Unnamed: 0_level_2,VEHAGE_C20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
VEHTYPE2_TXT,VEHAGE2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
1-Car,1: 1-5 years,6560,7768,8118,8606,7669,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1-Car,2: 6-10 years,0,0,0,0,0,5806,6274,5600,6688,6664,0,0,0,0,0,0,0,0,0,0
1-Car,3: 11+ years,0,0,0,0,0,0,0,0,0,0,6160,5710,4876,4418,3906,3528,3131,2357,1950,10299
2-Van,1: 1-5 years,550,678,787,628,744,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2-Van,2: 6-10 years,0,0,0,0,0,575,612,432,748,723,0,0,0,0,0,0,0,0,0,0
2-Van,3: 11+ years,0,0,0,0,0,0,0,0,0,0,920,945,606,542,529,471,515,326,235,965
3-SUV,1: 1-5 years,5988,5929,4952,4571,3812,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3-SUV,2: 6-10 years,0,0,0,0,0,3477,2819,2070,3137,2943,0,0,0,0,0,0,0,0,0,0
3-SUV,3: 11+ years,0,0,0,0,0,0,0,0,0,0,2617,2502,2587,2103,1703,1373,1121,1003,673,2338
4-Pickup/Truck,1: 1-5 years,2049,2288,2087,1846,1675,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [38]:
# if we model all three dimensions, we would sample the ages in more detail within each age group
# we've already accounted for income, so it doesn't need to show up again
# we probably want this to be body type specific to avoid baking in further assumptions about fuel type by age

pd.pivot_table(estfile[estfile['HHFAMINC2']<97], 
               index=['VEHTYPE2_TXT', 'FUELTYPE2_TXT', 'VEHAGE2_TXT'], 
               columns=['VEHAGE_C20'], 
               values=['BESTMILE'], 
               aggfunc=['count'], 
               fill_value=0).to_clipboard()

### For each alternative, calculate the average fuel economy and the number of makes & models available.

In [39]:
# convert make/model convert to numbers and drop missing records in temp file
estfile['MAKE'] = estfile['MAKE'].replace('XX','-99').astype(int)
estfile['MAKE'] = np.where(estfile['MAKE']<0, 98, estfile['MAKE'])

estfile['MODEL'] = estfile['MODEL'].replace('XXXXX','-99').astype(int)
estfile['MODEL'] = np.where(estfile['MODEL']<0, 98998, estfile['MODEL'])


In [40]:
# vehicle type by fuel type
pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT'], 
               columns=['FUELTYPE2_TXT'], 
               values=['FEGEMPG', 'MAKE', 'MODEL'], 
               aggfunc=['mean', pd.Series.nunique], 
               fill_value=0)

Unnamed: 0_level_0,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL
FUELTYPE2_TXT,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric,1-Gas,2-Diesel,3-Hybrid,4-Plug-In Hybrid,5-Battery Electric
VEHTYPE2_TXT,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3
1-Car,23.86,31.5,43.69,41.93,102.06,34.61,31.83,45.0,25.59,35.92,38120.23,34210.68,45375.22,28174.58,42450.99,33,21,29,18,28,51,28,21,14,18,419,74,53,20,21
2-Van,19.02,16.14,37.29,42.0,113.0,25.29,28.79,35.07,24.33,35.0,29414.54,52354.64,35326.0,24366.67,35055.0,26,7,8,2,1,43,12,5,2,1,208,19,7,2,1
3-SUV,19.83,20.5,28.68,26.19,85.67,29.99,25.74,42.02,35.86,41.0,31927.49,28143.19,43223.7,39268.9,44472.75,27,19,19,4,4,44,19,21,7,4,286,41,38,9,4
4-Pickup/Truck,16.19,13.94,19.4,0.0,62.0,21.68,14.46,17.8,0.0,98.0,24747.81,19049.96,26125.7,0.0,98998.0,25,18,2,0,1,41,21,4,0,1,188,45,6,0,1
5-Motorcycle,50.0,0.0,0.0,0.0,50.0,64.82,0.0,0.0,0.0,98.26,69991.37,0.0,0.0,0.0,98876.32,1,0,0,0,1,23,0,0,0,2,58,0,0,0,2


In [41]:
# vehicle type by age
pd.pivot_table(estfile, 
               index=['VEHAGE_C20'], 
               columns=['VEHTYPE2_TXT'], 
               values=['FEGEMPG', 'MAKE', 'MODEL'], 
               aggfunc=['mean', pd.Series.nunique], 
               fill_value=0)

Unnamed: 0_level_0,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL
VEHTYPE2_TXT,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle,1-Car,2-Van,3-SUV,4-Pickup/Truck,5-Motorcycle
VEHAGE_C20,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3
1,29.32,21.23,23.01,18.07,50,37.79,27.5,33.78,22.2,72.4,40892.82,31762.37,36143.86,24865.25,77006.5,51,21,27,19,1,37,18,35,17,11,202,47,138,46,23
2,28.97,21.06,22.61,17.59,50,37.52,29.71,31.51,21.22,71.75,40006.06,33273.81,33170.47,23895.01,76721.68,44,22,26,18,1,38,20,30,16,13,220,60,135,58,28
3,28.23,20.61,22.07,17.35,50,35.44,25.53,30.04,20.96,69.79,37707.02,29152.94,31467.53,23031.27,74097.31,44,21,25,19,1,36,24,32,17,11,224,59,133,50,30
4,28.39,20.68,21.61,16.82,50,36.96,25.85,32.16,21.02,68.53,38685.35,27765.19,33571.62,23119.06,72716.34,41,19,25,18,1,37,20,33,16,11,229,61,138,50,30
5,27.39,20.6,20.87,16.34,50,36.45,29.73,30.52,21.48,68.58,38086.37,32444.34,31870.24,23693.95,74070.6,45,19,23,17,1,39,24,31,13,12,235,55,123,51,30
6,25.56,20.42,20.46,16.28,50,36.79,32.05,32.07,20.9,68.85,38299.26,34024.6,33118.61,22994.29,74997.98,38,17,20,16,1,39,20,36,14,12,235,52,129,54,25
7,26.08,19.22,20.49,16.33,50,36.72,23.46,32.59,23.11,69.38,38354.24,25762.24,33762.81,25320.6,76716.62,29,19,20,14,1,39,25,35,20,12,244,59,138,61,26
8,24.67,19.23,20.04,16.23,50,37.19,27.13,32.69,21.45,67.85,39334.87,29146.94,34154.88,24650.27,72988.0,31,16,18,18,1,41,22,34,17,11,237,53,126,51,29
9,24.15,18.87,18.89,15.74,50,35.39,28.11,31.22,22.94,67.78,38118.2,31375.6,33259.37,26102.32,72436.83,29,13,17,16,1,39,23,37,21,12,246,57,139,55,30
10,23.91,18.88,18.48,15.74,50,35.62,26.56,30.8,22.99,65.71,37320.99,30636.02,33041.09,25637.98,71033.55,31,14,19,15,1,38,23,36,17,11,250,60,138,53,28


In [42]:
# vehicle type by fuel type by age
pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT','FUELTYPE2_TXT'], 
               columns=['VEHAGE2_TXT'],
               values=['FEGEMPG', 'MAKE', 'MODEL'], 
               aggfunc=['mean', pd.Series.nunique], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,mean,mean,mean,mean,mean,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,Unnamed: 1_level_1,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL
Unnamed: 0_level_2,VEHAGE2_TXT,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years,1: 1-5 years,2: 6-10 years,3: 11+ years
VEHTYPE2_TXT,FUELTYPE2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3
1-Car,1-Gas,26.41,23.72,21.99,36.56,35.83,32.32,38843.65,37866.25,37725.49,30,30,31,41,45,51,296,337,364
1-Car,2-Diesel,32.12,31.02,31.0,30.28,31.71,33.91,31920.25,32042.09,38510.28,16,11,16,17,13,18,32,28,40
1-Car,3-Hybrid,43.21,44.28,44.14,43.99,46.23,46.05,44306.84,46714.9,46352.69,23,24,13,19,14,4,45,27,11
1-Car,4-Plug-In Hybrid,41.9,41.46,47.0,25.03,31.21,49.0,27592.18,34533.54,49135.0,18,3,2,14,5,1,16,9,2
1-Car,5-Battery Electric,102.65,98.71,76.17,35.49,38.61,53.33,42258.72,40901.14,63014.5,25,6,5,16,6,5,17,5,4
2-Van,1-Gas,20.76,19.3,17.9,27.52,27.39,22.97,30705.91,30099.25,28342.84,23,20,20,31,33,38,115,123,130
2-Van,2-Diesel,15.0,13.57,17.49,40.0,35.79,20.49,46138.31,71762.79,50561.69,6,4,6,7,8,9,7,5,13
2-Van,3-Hybrid,38.18,46.0,28.0,38.91,7.0,28.0,39116.09,7442.0,28422.5,7,1,2,5,1,2,7,1,2
2-Van,4-Plug-In Hybrid,42.0,0.0,0.0,24.33,0.0,0.0,24366.67,0.0,0.0,2,0,0,2,0,0,2,0,0
2-Van,5-Battery Electric,113.0,0.0,0.0,35.0,0.0,0.0,35055.0,0.0,0.0,1,0,0,1,0,0,1,0,0


In [43]:
# vehicle type by fuel type by detailed age
pd.pivot_table(estfile, 
               index=['VEHTYPE2_TXT','FUELTYPE2_TXT'], 
               columns=['VEHAGE_C20'],
               values=['FEGEMPG', 'MAKE', 'MODEL'], 
               aggfunc=['mean', pd.Series.nunique], 
               fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,...,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique,nunique
Unnamed: 0_level_1,Unnamed: 1_level_1,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,...,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,FEGEMPG,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MAKE,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL,MODEL
Unnamed: 0_level_2,VEHAGE_C20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,...,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
VEHTYPE2_TXT,FUELTYPE2_TXT,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3,Unnamed: 25_level_3,Unnamed: 26_level_3,Unnamed: 27_level_3,Unnamed: 28_level_3,Unnamed: 29_level_3,Unnamed: 30_level_3,Unnamed: 31_level_3,Unnamed: 32_level_3,Unnamed: 33_level_3,Unnamed: 34_level_3,Unnamed: 35_level_3,Unnamed: 36_level_3,Unnamed: 37_level_3,Unnamed: 38_level_3,Unnamed: 39_level_3,Unnamed: 40_level_3,Unnamed: 41_level_3,Unnamed: 42_level_3,Unnamed: 43_level_3,Unnamed: 44_level_3,Unnamed: 45_level_3,Unnamed: 46_level_3,Unnamed: 47_level_3,Unnamed: 48_level_3,Unnamed: 49_level_3,Unnamed: 50_level_3,Unnamed: 51_level_3,Unnamed: 52_level_3,Unnamed: 53_level_3,Unnamed: 54_level_3,Unnamed: 55_level_3,Unnamed: 56_level_3,Unnamed: 57_level_3,Unnamed: 58_level_3,Unnamed: 59_level_3,Unnamed: 60_level_3,Unnamed: 61_level_3,Unnamed: 62_level_3,Unnamed: 63_level_3,Unnamed: 64_level_3,Unnamed: 65_level_3,Unnamed: 66_level_3,Unnamed: 67_level_3,Unnamed: 68_level_3,Unnamed: 69_level_3,Unnamed: 70_level_3,Unnamed: 71_level_3,Unnamed: 72_level_3,Unnamed: 73_level_3,Unnamed: 74_level_3,Unnamed: 75_level_3,Unnamed: 76_level_3,Unnamed: 77_level_3,Unnamed: 78_level_3,Unnamed: 79_level_3,Unnamed: 80_level_3,Unnamed: 81_level_3,Unnamed: 82_level_3,Unnamed: 83_level_3,Unnamed: 84_level_3,Unnamed: 85_level_3,Unnamed: 86_level_3,Unnamed: 87_level_3,Unnamed: 88_level_3,Unnamed: 89_level_3,Unnamed: 90_level_3,Unnamed: 91_level_3,Unnamed: 92_level_3,Unnamed: 93_level_3,Unnamed: 94_level_3,Unnamed: 95_level_3,Unnamed: 96_level_3,Unnamed: 97_level_3,Unnamed: 98_level_3,Unnamed: 99_level_3,Unnamed: 100_level_3
1-Car,1-Gas,27.15,26.64,26.35,26.33,25.67,24.25,24.33,23.93,23.25,23.02,22.33,22.72,22.23,22.2,21.94,21.99,21.62,21.66,21.93,21.41,37.85,37.37,35.19,36.73,35.88,36.5,36.14,36.87,34.86,35.07,34.23,33.45,33.92,33.66,32.79,31.99,32.22,31.94,32.1,29.35,40892.47,39958.46,37488.51,38628.06,37596.3,38071.92,37840.04,39118.75,37706.42,...,23,22,22,23,24,22,21,20,31,36,36,35,36,38,38,39,41,39,38,40,39,42,36,39,38,39,39,37,46,199,216,218,226,231,229,241,236,245,250,263,241,226,211,201,197,186,172,158,234
1-Car,2-Diesel,29.77,33.44,31.08,32.5,32.03,30.91,31.65,31.41,29.12,29.56,32.93,30.92,32.0,34.81,33.11,34.0,35.0,29.73,33.9,27.08,36.08,30.62,29.53,30.53,29.92,30.49,30.91,31.22,36.68,38.22,31.46,28.17,29.57,28.98,29.57,30.86,31.2,40.27,32.3,40.73,52490.85,31280.82,31524.33,31011.29,31783.1,31412.15,30951.73,31261.76,36735.16,...,5,5,5,9,5,2,5,4,14,6,7,10,9,8,7,6,6,9,4,4,6,4,3,5,2,2,5,4,12,7,15,19,14,12,10,10,7,13,6,9,10,7,9,9,6,5,10,6,17
1-Car,3-Hybrid,44.67,43.46,42.79,42.58,43.3,44.26,46.37,42.67,44.26,42.07,43.74,44.89,45.04,40.79,42.52,42.74,50.57,0.0,0.0,45.33,42.43,44.0,42.86,43.42,46.75,45.75,44.59,47.44,47.6,47.14,46.38,46.63,45.63,42.5,47.67,47.74,40.43,0.0,0.0,41.0,43207.05,44193.85,43169.62,43609.02,47044.63,45946.83,45751.25,47489.81,47650.43,...,6,2,6,3,2,2,0,0,3,10,15,14,14,12,10,8,7,7,6,3,2,2,3,2,2,2,0,0,2,21,31,28,26,25,16,13,11,10,13,7,6,3,6,3,2,2,0,0,3
1-Car,4-Plug-In Hybrid,42.64,41.91,41.25,42.2,41.08,38.27,46.75,40.5,46.0,43.5,50.0,50.0,0.0,38.0,0.0,0.0,0.0,0.0,0.0,0.0,23.9,25.2,23.87,25.07,29.1,21.18,41.75,33.25,49.0,34.5,49.0,49.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,27992.85,25228.14,30532.62,25104.31,29130.31,21210.73,41791.0,33286.75,49044.0,...,1,0,1,0,0,0,0,0,0,12,5,8,6,2,3,2,4,1,2,1,1,0,1,0,0,0,0,0,0,13,6,9,7,2,4,2,4,2,2,1,1,0,1,0,0,0,0,0,0
1-Car,5-Battery Electric,103.47,105.06,100.05,102.83,97.93,98.38,0.0,0.0,87.0,119.0,0.0,106.0,70.0,0.0,70.0,0.0,0.0,0.0,113.0,49.0,34.19,35.26,35.55,36.68,36.98,36.81,0.0,0.0,65.0,59.0,0.0,49.0,98.0,0.0,98.0,0.0,0.0,0.0,12.0,31.5,45192.15,42584.95,43420.05,36773.51,42329.0,39275.62,0.0,0.0,65031.0,...,1,1,0,1,0,0,0,1,2,12,12,10,9,8,4,0,0,1,1,0,1,1,0,1,0,0,0,1,2,11,9,9,10,6,3,0,0,1,1,0,1,1,0,1,0,0,0,1,2
2-Van,1-Gas,21.13,21.1,20.51,20.62,20.56,20.45,19.24,19.17,18.91,18.9,18.71,18.82,18.33,18.14,17.82,17.85,17.51,16.96,17.39,16.53,27.28,29.71,25.34,25.67,29.59,31.96,23.34,27.17,28.2,26.49,26.51,22.72,30.68,23.22,22.39,22.37,21.78,18.84,19.94,18.3,31484.08,33294.22,29011.08,27603.95,32231.58,33909.27,25619.49,29196.05,31036.1,...,14,10,9,11,13,13,11,9,17,18,20,24,20,23,19,25,22,23,21,24,22,22,18,21,19,21,16,16,24,43,60,54,59,54,52,58,53,56,60,58,61,50,43,47,42,44,44,34,52
2-Van,2-Diesel,16.78,14.8,13.71,14.25,14.25,11.5,11.5,0.0,14.57,14.0,17.0,17.0,15.25,6.0,17.0,29.0,23.0,17.0,19.0,18.33,34.22,30.0,38.57,56.0,52.0,59.5,62.0,0.0,18.0,44.0,15.92,28.2,27.0,12.0,12.0,12.0,7.0,12.0,12.0,22.61,44851.89,30449.4,38971.14,56595.25,70729.5,68219.5,70729.5,0.0,68477.57,...,1,4,1,1,1,2,1,1,5,4,2,3,2,2,2,2,0,4,3,6,3,4,1,1,1,1,1,1,5,4,3,3,2,2,2,2,0,3,2,7,2,3,1,1,1,2,1,1,8
2-Van,3-Hybrid,37.5,0.0,38.0,42.0,34.0,0.0,0.0,46.0,0.0,0.0,0.0,26.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,50.5,0.0,48.5,24.33,28.0,0.0,0.0,7.0,0.0,0.0,0.0,49.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0,50720.75,0.0,48724.5,24366.67,28422.5,0.0,0.0,7442.0,0.0,...,1,0,0,1,0,0,0,0,0,2,0,2,2,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,3,0,2,2,2,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0
2-Van,4-Plug-In Hybrid,38.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12027.0,0.0,0.0,0.0,49046.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2-Van,5-Battery Electric,0.0,0.0,113.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,35055.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [44]:
# vehicle type by fuel type by detailed age -- output to CSV

vehtype_names = ['Car', 'Van', 'SUV', 'Pickup', 'Motorcycle']
fueltype_names = ['Gas', 'Diesel', 'Hybrid', 'Plug In Electric', 'Battery Electric']

df = pd.DataFrame(columns=['CHOICE4', 'VEHTYPE2_TXT', 'FUELTYPE2_TXT', 'VEHAGE_C20'])

for v in range(1,6):
    for f in range(1,6):
        for a in range(1,21):
            df = df.append({'CHOICE4' : v*1000+f*100+a, 
                            'VEHTYPE2_TXT' : vehtype_names[v-1], 
                            'FUELTYPE2_TXT' : fueltype_names[f-1],
                            'VEHAGE_C20' : a}, 
                            ignore_index=True)

    
pivot = pd.pivot_table(estfile, 
               index=['CHOICE4'], 
               values=['VEHID', 'FEGEMPG', 'MAKE', 'MODEL'], 
               aggfunc={'VEHID' : 'count', 
                        'FEGEMPG' : 'mean', 
                        'MAKE' : pd.Series.nunique, 
                        'MODEL': pd.Series.nunique}, 
               fill_value=0)   

df = df.merge(pivot, on='CHOICE4', how='left')
df = df.fillna(0)
df.to_csv('vehtype_attributes_raw.csv')

### Wrap in the number of chargers by CBSA

In [45]:
# merge CBSA data
cbsa_data = pd.read_csv('cbsa_chargers.csv')
cbsa_data['HH_CBSA'] = cbsa_data['HH_CBSA'].astype(str)
estfile = estfile.merge(cbsa_data, how='left', on='HH_CBSA', suffixes=(None, '_x'))


In [46]:
# merge state data
state_data = pd.read_csv('state_chargers.csv')
estfile = estfile.merge(state_data, how='left', on='HHSTFIPS', suffixes=(None, '_x'))


In [47]:
estfile.head()

Unnamed: 0,HOUSEID,VEHID,VEHYEAR,VEHAGE,MAKE,MODEL,FUELTYPE,VEHTYPE,WHOMAIN,OD_READ,HFUEL,VEHOWNED,VEHOWNMO,ANNMILES,HYBRID,PERSONID,TRAVDAY,HOMEOWN,HHSIZE,HHVEHCNT,HHFAMINC,DRVRCNT,HHSTATE,HHSTFIPS,NUMADLT,WRKCOUNT,TDAYDATE,LIF_CYC,MSACAT,MSASIZE,RAIL,URBAN,URBANSIZE,URBRUR,CENSUS_D,CENSUS_R,CDIVMSAR,HH_RACE,HH_HISP,HH_CBSA,SMPLSRCE,WTHHFIN,BESTMILE,BEST_FLG,BEST_EDT,BEST_OUT,HBHUR,HTHTNRNT,HTPPOPDN,...,WOMEN16P,BORN_IN_US16P,BORN_OUTSIDE_US16P,DISTTOWK17,DISTTOWK_SUM,DISTTOWK_AVG,VEHTYPE2,VEHTYPE2_TXT,FUELTYPE2,FUELTYPE2_TXT,VEHAGE2,VEHAGE2_TXT,NEW_VEHICLE,RANK,CAR_COUNT,VAN_COUNT,SUV_COUNT,PU_COUNT,MC_COUNT,GAS_COUNT,DSL_COUNT,HYB_COUNT,PEV_COUNT,BEV_COUNT,RANK_x,RANK_x.1,RANK_x.2,CHOICE1,CHOICE1_TXT,CHOICE2,CHOICE2_TXT,VEHAGE_C20,CHOICE3,CHOICE4,CHOICE4B,HHFAMINC2,HHFAMINC2_TXT,CBSA_NAME,CBSA_CHARGERS,CBSA_POP,CBSA_SQMI,CBSA_CHARGERS_PERCAP,CBSA_CHARGERS_PERMI,State,ST_CHARGERS,ST_POP,ST_SQMI,ST_CHARGERS_PERCAP,ST_CHARGERS_PERMI
0,30000007,1,2007,10,49,49032,1,1,3,69000,-1,1,-1,1000,2,3,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,14611.93,1,-1,4,T,50,1500,...,2,3,0,82.61,82.61,82.61,1,1-Car,1,1-Gas,2,2: 6-10 years,0,1.0,0,0,0,0,0,0,0,0,0,0,1.0,3.0,6.0,11,Car-Gas,112,Car-Gas-Age610,10,110,1110,10,3,"3: $50,000-$99,999",,,,,,,North Carolina,2584.0,9448939.0,53819.0,0.0,0.05
1,30000007,2,2004,13,49,49442,1,2,-8,164000,-1,1,-1,2500,2,-8,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,4767.09,3,-1,-1,T,50,1500,...,2,3,0,82.61,82.61,82.61,2,2-Van,1,1-Gas,3,3: 11+ years,0,4.0,2,0,0,1,0,3,0,0,0,0,1.0,3.0,6.0,21,Van-Gas,213,Van-Gas-Age11P,13,213,2113,113,3,"3: $50,000-$99,999",,,,,,,North Carolina,2584.0,9448939.0,53819.0,0.0,0.05
2,30000007,3,1998,19,19,19014,1,1,1,120000,-1,1,-1,900,2,1,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,8000.32,1,-1,-1,T,50,1500,...,2,3,0,82.61,82.61,82.61,1,1-Car,1,1-Gas,3,3: 11+ years,0,3.0,1,0,0,1,0,2,0,0,0,0,1.0,3.0,6.0,11,Car-Gas,113,Car-Gas-Age11P,19,119,1119,19,3,"3: $50,000-$99,999",,,,,,,North Carolina,2584.0,9448939.0,53819.0,0.0,0.05
3,30000007,5,1993,24,20,20481,1,4,2,300000,-1,1,-1,10000,2,2,2,1,3,5,7,3,NC,37,3,1,201608,10,3,1,2,1,1,1,5,3,53,2,2,XXXXX,2,187.31,12437.66,1,-1,-1,T,50,1500,...,2,3,0,82.61,82.61,82.61,4,4-Pickup/Truck,1,1-Gas,3,3: 11+ years,0,2.0,1,0,0,0,0,1,0,0,0,0,1.0,3.0,6.0,41,Pickup-Gas,413,Pickup-Gas-Age11P,20,420,4120,320,3,"3: $50,000-$99,999",,,,,,,North Carolina,2584.0,9448939.0,53819.0,0.0,0.05
4,30000008,1,2014,3,20,20028,1,1,97,-88,-1,1,-1,8000,2,97,5,1,2,4,8,2,WI,55,2,2,201608,2,2,5,2,4,6,2,3,2,32,1,2,33460,2,69.51,6099.62,3,-1,-1,R,5,300,...,1,2,0,11.18,11.18,5.59,1,1-Car,1,1-Gas,1,1: 1-5 years,0,4.0,1,0,1,1,0,3,0,0,0,0,1.0,3.0,6.0,11,Car-Gas,111,Car-Gas-Age15,3,103,1103,3,4,"4: $100,000-$149,990","Minneapolis-St. Paul-Bloomington, MN-WI",1071.0,3904297.0,6027.17,0.0,0.18,Wisconsin,1565.0,5424853.0,65496.0,0.0,0.02


In [48]:
# if the CBSA is coded, keep that.  Otherwise use the state level values

estfile['CHARGERS_PERCAP'] = np.where(estfile['CBSA_CHARGERS_PERCAP']>0, estfile['CBSA_CHARGERS_PERCAP'], estfile['ST_CHARGERS_PERCAP'])
estfile['CHARGERS_PERMI']  = np.where(estfile['CBSA_CHARGERS_PERMI']>0,  estfile['CBSA_CHARGERS_PERMI'],  estfile['ST_CHARGERS_PERMI'])

In [49]:
# code the CBSA as a numerical field
estfile['HH_CBSA'] = estfile['HH_CBSA'].replace('XXXXX', '0')
estfile['HH_CBSA'] = estfile['HH_CBSA'].astype(int).fillna(0)

In [50]:
# code the state FIPS as a numerical field
estfile['HHSTFIPS'] = estfile['HHSTFIPS'].replace('XXXXX', '0')
estfile['HHSTFIPS'] = estfile['HHSTFIPS'].astype(int).fillna(0)

### Now assemble the estimation file

In [51]:
# keep only the potentially relevant fields
fields = ['HOUSEID',
'VEHID',
'RANK',
'VEHYEAR',
'VEHAGE',
'MAKE',
'MODEL',
'FUELTYPE',
'VEHTYPE',
'WHOMAIN',
'HFUEL',
'CHOICE1', 
'CHOICE1_TXT', 
'CHOICE2',
'CHOICE2_TXT',
'CHOICE3',
'CHOICE4',
'CHOICE4B', 
'VEHTYPE2',
'VEHTYPE2_TXT',
'FUELTYPE2',
'FUELTYPE2_TXT',
'VEHAGE2',
'VEHAGE2_TXT',
'VEHAGE_C20',
'NEW_VEHICLE',
'HOMEOWN',
'HHSIZE',
'HHVEHCNT',
'HHFAMINC',
'DRVRCNT',
'HHSTATE',
'HHSTFIPS',
'NUMADLT',
'WRKCOUNT',
'LIF_CYC',
'MSACAT',
'MSASIZE',
'RAIL',
'URBAN',
'URBANSIZE',
'URBRUR',
'CENSUS_D',
'CENSUS_R',
'CDIVMSAR',
'HH_RACE',
'HH_HISP',
'HH_CBSA',
'SMPLSRCE',
'WTHHFIN',
'BESTMILE',
'HBHUR',
'HTHTNRNT',
'HTPPOPDN',
'HTRESDN',
'HTEEMPDN',
'HBHTNRNT',
'HBPPOPDN',
'HBRESDN',
'GSYRGAL',
'GSTOTCST',
'FEGEMPG',
'GSCOST',
'PRICE',
'PLACE',
'HHRELATD',
'YOUNGCHILD',
'CHILDREN0_4',
'CHILDREN5_15',
'CHILDREN16P',
'COLLEGE_STUDENTS',
'FULL_TIME_WORKERS',
'PART_TIME_WORKERS',
'NW_ADULTS_UNDER65',
'NW_ADULTS_65P',
'WORKERS',
'CHILDREN',
'PERSONS16P',
'DRIVERS',
'MEN16P',
'WOMEN16P',
'BORN_IN_US16P',
'BORN_OUTSIDE_US16P',
'DISTTOWK_SUM',
'DISTTOWK_AVG', 
'CAR_COUNT',
'VAN_COUNT',
'SUV_COUNT',
'PU_COUNT',
'MC_COUNT',
'GAS_COUNT',
'DSL_COUNT',
'HYB_COUNT',
'PEV_COUNT',
'BEV_COUNT', 
'CBSA_CHARGERS',
'CBSA_POP',
'CBSA_SQMI',
'CBSA_CHARGERS_PERCAP',
'CBSA_CHARGERS_PERMI',
'ST_CHARGERS',
'ST_POP',
'ST_SQMI',
'ST_CHARGERS_PERCAP',
'ST_CHARGERS_PERMI',
'CHARGERS_PERCAP',
'CHARGERS_PERMI']

estfile = estfile[fields]

In [52]:
# merge with the alternative specific data for choice set 1
estfile = estfile.select_dtypes(include=[np.number])
estfile.to_csv('estfile9.csv')