# Estimating Auto Ownership Model

This notebook re-estimates ActivitySim auto ownership model in Larch. 

# Load libraries

In [1]:
import os
import larch  # !conda install larch -c conda-forge # for estimation
import pandas as pd
import numpy as np
from larch import P, X
import matplotlib.pyplot as plt

The directory with the estimation data bundles for auto ownership (in output/estimation_data_bundle).

In [2]:
os.chdir('/projects/SANDAG/2017 On-Call Modeling Services/Area B/TO 05 - ABM3/estimation')

# Load data and prep model for estimation

In [3]:
modelname = "auto_ownership"

from activitysim.estimation.larch import component_model
model, data = component_model(modelname, return_data=True)

# Review data loaded from the EDB

The next step is to read the EDB, including the coefficients, model settings, utilities specification, and chooser and alternative data.

### Coefficients

In [4]:
data.coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coef_alternative_specific_constant_for_1_driver_household_0_CARS,0,F
coef_alternative_specific_constant_for_1_driver_household_2_CARS,0,F
coef_alternative_specific_constant_for_1_driver_household_3_CARS,0,F
coef_alternative_specific_constant_for_1_driver_household_4_CARS,0,F
coef_alternative_specific_constant_for_2_driver_household_0_CARS,0,F
...,...,...
coef_NEST_0_1CARS,1,T
cars0_2016,0,F
cars2_2016,0,F
cars3_2016,0,F


#### Utility specification

In [5]:
data.spec

Unnamed: 0,Label,Description,Expression,cars0,cars1,cars2,cars3,cars4,Unnamed: 8
0,util_drivers_1,alternative specific constant for 1 driver hou...,@df.num_drivers==1,coef_alternative_specific_constant_for_1_drive...,,coef_alternative_specific_constant_for_1_drive...,coef_alternative_specific_constant_for_1_drive...,coef_alternative_specific_constant_for_1_drive...,
1,util_drivers_2,alternative specific constant for 2 driver hou...,@df.num_drivers==2,coef_alternative_specific_constant_for_2_drive...,coef_alternative_specific_constant_for_2_drive...,,coef_alternative_specific_constant_for_2_drive...,coef_alternative_specific_constant_for_2_drive...,
2,util_drivers_3,alternative specific constant for 3 driver hou...,@df.num_drivers==3,coef_alternative_specific_constant_for_3_drive...,coef_alternative_specific_constant_for_3_drive...,coef_alternative_specific_constant_for_3_drive...,,coef_alternative_specific_constant_for_3_drive...,
3,util_drivers_4p,alternative specific constant for 4+ driver ho...,@df.num_drivers>=4,coef_alternative_specific_constant_for_4_drive...,coef_alternative_specific_constant_for_4_drive...,coef_alternative_specific_constant_for_4_drive...,coef_alternative_specific_constant_for_4_drive...,,
4,util_has_18_24,Presence of persons age 18-24,(num_college_age > 0),coef_cars0_has_18_24,,coef_cars2_has_18_24,coef_cars34_has_18_24,coef_cars34_has_18_24,
5,util_has_25_34,Presence of persons age 35-34,(num_young_adults > 0),coef_cars0_has_25_34,,coef_cars2_has_25_34,coef_cars34_has_25_34,coef_cars34_has_25_34,
6,util_has_65_79,Presence of persons age 65-79,num_young_retirees > 0,coef_cars0_has_65_79,,coef_cars2_has_65_79,coef_cars34_has_65_79,coef_cars34_has_65_79,
7,util_has_80plus,Presence of persons age 80+,num_old_retirees > 0,coef_cars0_has_80plus,,coef_cars2_has_80plus,coef_cars34_has_80plus,coef_cars34_has_80plus,
8,util_has_0_4,Presence of children age 0-4,(num_young_children>0),coef_cars0_has_0_4,,coef_cars2_has_0_4,coef_cars34_has_0_4,coef_cars34_has_0_4,
9,util_has_5_17,Presence of children age 5-17,(num_children_5_to_15+num_children_16_to_17)>0,coef_cars0_has_5_17,,coef_cars2_has_5_17,coef_cars34_has_5_17,coef_cars34_has_5_17,


## Explore data

In [6]:
data.chooser_data

Unnamed: 0_level_0,model_choice,override_choice,util_drivers_1,util_drivers_2,util_drivers_3,util_drivers_4p,util_persons_16_17,util_persons_18_24,util_persons_25_34,util_has_18_24,...,preschool_target,is_parking_zone,shopping_accessibility_0,shopping_accessibility_1,shopping_accessibility_2,othdiscr_accessibility_0,othdiscr_accessibility_1,othdiscr_accessibility_2,res_type,override_choice_code
household_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,3,2,0,1,0,0,0,0,0,0,...,315,True,11.538816,11.086219,11.217657,15.552833,15.204355,14.884606,1,3
2,0,2,0,1,0,0,0,0,0,0,...,156,True,11.807039,11.389310,11.339877,14.357026,13.978252,13.464349,1,3
3,0,1,1,0,0,0,0,1,0,1,...,240,True,12.744699,12.191229,12.060988,14.775725,14.323247,13.877434,4,2
4,0,3,0,1,0,0,0,0,0,0,...,239,True,12.973414,12.131250,11.800279,16.350536,15.744719,15.663887,1,4
5,3,1,0,1,0,0,0,0,0,0,...,97,True,11.958179,11.186803,11.269519,14.740860,14.043881,13.959714,1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44589,1,1,1,0,0,0,0,0,1,0,...,286,True,14.403127,13.669578,13.162124,17.174279,16.511511,15.836580,3,2
48063,1,1,0,1,0,0,0,0,0,0,...,0,True,11.943152,11.871376,10.778622,15.158994,14.774424,14.634361,6,2
48064,1,1,1,0,0,0,0,1,0,1,...,1028,True,12.337126,11.879260,11.438389,15.100142,14.147283,13.978258,4,2
49762,2,1,1,0,0,0,0,0,0,0,...,858,True,11.526566,10.899991,10.909738,14.080078,13.737474,13.444756,1,2


In [7]:
pd.crosstab(data.chooser_data.HHT, data.chooser_data.override_choice, margins=True)

override_choice,0,1,2,3,4,All
HHT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,20,8,1,0,0,29
1,42,331,847,237,77,1534
4,660,3450,2365,658,284,7417
All,722,3789,3213,895,361,8980


In [8]:
pd.crosstab(data.chooser_data.num_workers, data.chooser_data.override_choice, margins=True)

override_choice,0,1,2,3,4,All
num_workers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,459,1478,730,170,53,2890
1,234,2042,1073,296,110,3755
2,19,259,1383,371,144,2176
3,2,9,26,54,35,126
4,0,1,0,3,12,16
5,8,0,1,1,4,14
6,0,0,0,0,1,1
7,0,0,0,0,2,2
All,722,3789,3213,895,361,8980


In [9]:
#plt.hist(data.chooser_data['distance_int'],range=(0, data.chooser_data['distance_int'].max()), bins=data.chooser_data['distance_int'].max() + 1)
#plot_df = data.chooser_data.groupby('distance_int')['external_worker'].mean().mul(100).reindex(range(data.chooser_data.distance_int.min()-1,data.chooser_data.distance_int.max()+1), fill_value=0)

#ax = plot_df.plot(kind='bar',rot = 0,title='Share of External Workers by Distance to Closest External Station',ylim=[0, 30], xlabel="Distance (mi)", ylabel="Percent",  figsize=(20, 5))

#plt.show()

# Set Coefficients

In [10]:
dir(model)
print(model.utility_co)

DictOfLinearFunction_C({1:   P.coef_alternative_specific_constant_for_1_driver_household_0_CARS * X.util_drivers_1
+ P.coef_alternative_specific_constant_for_2_driver_household_0_CARS * X.util_drivers_2
+ P.coef_alternative_specific_constant_for_3_driver_household_0_CARS * X.util_drivers_3
+ P.coef_alternative_specific_constant_for_4_driver_household_0_CARS * X.util_drivers_4p
+ P.coef_cars0_has_18_24 * X.util_has_18_24
+ P.coef_cars0_has_25_34 * X.util_has_25_34
+ P.coef_cars0_has_65_79 * X.util_has_65_79
+ P.coef_cars0_has_80plus * X.util_has_80plus
+ P.coef_cars0_has_0_4 * X.util_has_0_4
+ P.coef_cars0_has_5_17 * X.util_has_5_17
+ P.coef_cars0_workers_1 * X.util_has_1_worker
+ P.coef_cars0_workers_2 * X.util_has_2_workers
+ P.coef_cars0_workers_3plus * X.util_has_3plus_workers
+ P.coef_household_income_15k_0_CARS * X.util_hh_income_verylow
+ P.coef_household_income_1530k_0_CARS * X.util_hh_income_low
+ P.coef_household_income_3060k_0_CARS * X.util_hh_income_mid
+ P.coef_household_in

In [11]:
#model.utility_co = {0: P.coef_dist_to_nearest_ext_station * X.util_dist_to_nearest_ext_station
#+ P.coef_size_of_nearest_ext_station * X.util_size_of_nearest_ext_station
#+ P.coef_part_time * X.parttime
#+ P.coef_agriculture * X.agriculture
#+ P.coef_business_srv * X.business_srv
#+ P.coef_construction * X.construction
#+ P.coef_education * X.education
#+ P.coef_entertainment * X.entertainment
#+ P.coef_food_srv * X.food_srv                   
#+ P.coef_government * X.government
#+ P.coef_healthcare * X.healthcare                   
#+ P.coef_manufacturing * X.manufacturing
#+ P.coef_mgmt_srv * X.mgmt_srv
#+ P.coef_military * X.military
#+ P.coef_retail * X.retail    
#+ P.coef_inc_lt15 * X.income_less15K
#+ P.coef_inc_15_25 * X.income_15_25 
#+ P.coef_inc_25_50 * X.income_25_50 
#+ P.coef_inc_100_150 * X.income_100_150 
#+ P.coef_inc_150_250 * X.income_150_250 
#+ P.coef_inc_250plus * X.income_250plus
#+ P.asc_external_2016 * X.year_2016         
#+ P.coef_dist_lt_2p5 * X.distance_lt_2p5                    
#+ P.asc_external_worker * X.util_asc_placeholder, 1: 0}

# Estimate

With the model setup for estimation, the next step is to estimate the model coefficients.  Make sure to use a sufficiently large enough household sample and set of zones to avoid an over-specified model, which does not have a numerically stable likelihood maximizing solution.  Larch has a built-in estimation methods including BHHH, and also offers access to more advanced general purpose non-linear optimizers in the `scipy` package, including SLSQP, which allows for bounds and constraints on parameters.  BHHH is the default and typically runs faster, but does not follow constraints on parameters.

In [12]:
model.load_data()
#model.doctor(repair_ch_av="-")

req_data does not request avail_ca or avail_co but it is set and being provided
converting data_co to <class 'numpy.float64'>


In [13]:
model.maximize_loglike(method="SLSQP", options={"maxiter": 1000})


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
asc_GQ_0_autos,2.468182,0.0,0.0,,,0,,2.468182
asc_allhhs_0_autos,0.000000,0.0,0.0,0.0,0.0,1,,0.000000
asc_allhhs_2_autos,0.000000,0.0,0.0,0.0,0.0,1,,0.000000
asc_allhhs_4plus_autos,0.000000,0.0,0.0,0.0,0.0,1,,0.000000
asc_allhs_3_autos,0.000000,0.0,0.0,0.0,0.0,1,,0.000000
...,...,...,...,...,...,...,...,...
coef_retail_density_0_CARS,0.014187,0.0,0.0,,,0,,0.014187
coef_retail_density_2_CARS,0.000000,0.0,0.0,0.0,0.0,1,,0.000000
coef_retail_density_3_CARS,-0.010902,0.0,0.0,,,0,,-0.010902
coef_retail_density_4_CARS,-0.016816,0.0,0.0,,,0,,-0.016816


if you get poor results, consider setting global bounds with model.set_cap()


Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
asc_GQ_0_autos,2.468182
asc_allhhs_0_autos,0.000000
asc_allhhs_2_autos,0.000000
asc_allhhs_4plus_autos,0.000000
asc_allhs_3_autos,0.000000
cars0_2016,-0.817507
cars2_2016,0.255950
cars3_2016,0.433449
cars4_2016,0.528642
coef_NEST_0_1CARS,1.000000

Unnamed: 0,0
asc_GQ_0_autos,2.468182
asc_allhhs_0_autos,0.0
asc_allhhs_2_autos,0.0
asc_allhhs_4plus_autos,0.0
asc_allhs_3_autos,0.0
cars0_2016,-0.817507
cars2_2016,0.25595
cars3_2016,0.433449
cars4_2016,0.528642
coef_NEST_0_1CARS,1.0

Unnamed: 0,0
asc_GQ_0_autos,6e-06
asc_allhhs_0_autos,0.0
asc_allhhs_2_autos,0.0
asc_allhhs_4plus_autos,0.0
asc_allhs_3_autos,0.0
cars0_2016,-0.000512
cars2_2016,0.001949
cars3_2016,0.000209
cars4_2016,-4.1e-05
coef_NEST_0_1CARS,0.0


### Estimated coefficients

In [14]:
model.calculate_parameter_covariance()
result_dir='/projects/SANDAG/2017 On-Call Modeling Services/Area B/TO 05 - ABM3/estimation/'
model.to_xlsx(
        result_dir+"auto_ownership_007.xlsx", 
        data_statistics=True,
    )

  model.calculate_parameter_covariance()
  xl = ExcelWriter(filename, engine='xlsxwriter_larch', model=model, **kwargs)


<larch.util.excel.ExcelWriter at 0x2b27143e3a0>

# Output Estimation Results

In [15]:
from activitysim.estimation.larch import update_coefficients
result_dir = data.edb_directory/"estimated"
update_coefficients(
    model, data, result_dir,
    output_file=f"{modelname}_coefficients_007.csv",
);

In [16]:
#larch.__version__

In [17]:
#result_dir

### Write the model estimation report, including coefficient t-statistic and log likelihood

# Next Steps

The final step is to either manually or automatically copy the `*_coefficients_revised.csv` file to the configs folder, rename it to `*_coefficients.csv`, and run ActivitySim in simulation mode.

In [18]:
#pd.read_csv(result_dir/f"{modelname}_coefficients_revised.csv")