# Estimating At-Work Subtour Frequency

This notebook illustrates how to re-estimate a single model component for ActivitySim.  This process 
includes running ActivitySim in estimation mode to read household travel survey files and write out
the estimation data bundles used in this notebook.  To review how to do so, please visit the other
notebooks in this directory.

# Load libraries

In [1]:
import os
import larch  # !conda install larch -c conda-forge # for estimation
import pandas as pd

We'll work in our `test` directory, where ActivitySim has saved the estimation data bundles.

In [2]:
os.chdir('test')

# Load data and prep model for estimation

In [3]:
modelname = "atwork_subtour_frequency"

from activitysim.estimation.larch import component_model
model, data = component_model(modelname, return_data=True)

# Review data loaded from the EDB

The next step is to read the EDB, including the coefficients, model settings, utilities specification, and chooser and alternative data.

## Coefficients

In [4]:
data.coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coefficient_dummy_for_full_time_worker_business1,-7.3750,F
coefficient_dummy_for_full_time_worker_business2,-14.2800,F
coefficient_dummy_for_full_time_worker_eat,-7.2800,F
coefficient_dummy_for_full_time_worker_eat_business,-14.7900,F
coefficient_dummy_for_full_time_worker_maint,-8.0930,F
...,...,...
coefficient_at_work_sub_tour_asc_business2,-2.1337,F
coefficient_at_work_sub_tour_asc_eat,0.8576,F
coefficient_at_work_sub_tour_asc_eat_business,-0.9721,F
coefficient_at_work_sub_tour_asc_maint,-0.6198,F


## Utility specification

In [5]:
data.spec

Unnamed: 0,Label,Expression,no_subtours,eat,business1,maint,business2,eat_business
0,util_dummy_for_full_time_worker,pemploy==1,coefficient_dummy_for_full_time_worker_no_subt...,coefficient_dummy_for_full_time_worker_eat,coefficient_dummy_for_full_time_worker_business1,coefficient_dummy_for_full_time_worker_maint,coefficient_dummy_for_full_time_worker_business2,coefficient_dummy_for_full_time_worker_eat_bus...
1,util_dummy_for_non_full_time_worker,pemploy!=1,coefficient_dummy_for_non_full_time_worker_no_...,coefficient_dummy_for_non_full_time_worker_eat,coefficient_dummy_for_non_full_time_worker_bus...,coefficient_dummy_for_non_full_time_worker_maint,coefficient_dummy_for_non_full_time_worker_bus...,coefficient_dummy_for_non_full_time_worker_eat...
2,util_dummy_for_non_workers,"ptype in [4, 5]",coefficient_dummy_for_non_workers_no_subtours,coefficient_dummy_for_non_workers_eat,coefficient_dummy_for_non_workers_business1,coefficient_dummy_for_non_workers_maint,coefficient_dummy_for_non_workers_business2,coefficient_dummy_for_non_workers_eat_business
3,util_medium_hh_income_dummy,income_segment == 2,coefficient_medium_hh_income_dummy_no_subtours,coefficient_medium_hh_income_dummy_eat,coefficient_medium_hh_income_dummy_business1,coefficient_medium_hh_income_dummy_maint,coefficient_medium_hh_income_dummy_business2,coefficient_medium_hh_income_dummy_eat_business
4,util_high_hh_income_dummy,(income_segment > 2) & (income_segment < 5),coefficient_high_hh_income_dummy_no_subtours,coefficient_high_hh_income_dummy_eat,coefficient_high_hh_income_dummy_business1,coefficient_high_hh_income_dummy_maint,coefficient_high_hh_income_dummy_business2,coefficient_high_hh_income_dummy_eat_business
5,util_zero_cars_owned_by_hh_dummy,auto_ownership == 0,coefficient_zero_cars_owned_by_hh_dummy_no_sub...,coefficient_zero_cars_owned_by_hh_dummy_eat,coefficient_zero_cars_owned_by_hh_dummy_business1,coefficient_zero_cars_owned_by_hh_dummy_maint,coefficient_zero_cars_owned_by_hh_dummy_business2,coefficient_zero_cars_owned_by_hh_dummy_eat_bu...
6,util_individual_discretionary_tours_made_by_fu...,@(df.pemploy==1)*df.num_discr_tours,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...
7,util_individual_discretionary_tours_made_by_pa...,@(df.pemploy==2)*df.num_discr_tours,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...,coefficient_individual_discretionary_tours_mad...
8,util_individual_eating_out_tours_made_by_person,num_eatout_tours,coefficient_individual_eating_out_tours_made_b...,coefficient_individual_eating_out_tours_made_b...,coefficient_individual_eating_out_tours_made_b...,coefficient_individual_eating_out_tours_made_b...,coefficient_individual_eating_out_tours_made_b...,coefficient_individual_eating_out_tours_made_b...
9,util_main_shop_escort_tours_allocated_to_full_...,@(df.pemploy==1)*df.num_maint_shop_escort,coefficient_main_shop_escort_tours_allocated_t...,coefficient_main_shop_escort_tours_allocated_t...,coefficient_main_shop_escort_tours_allocated_t...,coefficient_main_shop_escort_tours_allocated_t...,coefficient_main_shop_escort_tours_allocated_t...,coefficient_main_shop_escort_tours_allocated_t...


## Chooser data

In [6]:
data.chooser_data

Unnamed: 0_level_0,model_choice,override_choice,util_dummy_for_full_time_worker,util_dummy_for_non_full_time_worker,util_dummy_for_non_workers,util_medium_hh_income_dummy,util_high_hh_income_dummy,util_zero_cars_owned_by_hh_dummy,util_individual_discretionary_tours_made_by_full_time_worker,util_individual_discretionary_tours_made_by_part_time_worker,...,TERMINAL,household_density,employment_density,density_index,is_cbd,num_maint_shop_escort,num_joint_discr,num_joint_maint_shop_eat,work_tour_is_SOV,override_choice_code
tour_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2998943,maint,maint,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,2.48345,26.073171,8.048780,6.150212,False,0,0,0,False,3
3060361,eat,eat,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2.09035,20.666667,4.107527,3.426505,False,0,0,0,True,1
4422914,eat,eat,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,5.35435,139.333333,418.518519,104.532377,False,0,0,0,False,1
4440298,maint,maint,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5.22542,97.634722,550.205552,82.920387,False,0,0,0,False,3
4496796,maint,maint,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,...,4.73802,117.769796,246.205869,79.663609,False,0,0,0,False,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
302923742,maint,maint,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.37546,19.153846,5.907692,4.515087,False,0,0,0,True,3
302942602,eat,eat,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.81406,16.068376,21.136752,9.128669,False,0,0,0,True,1
302942643,maint,maint,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,2.81406,16.068376,21.136752,9.128669,False,0,0,0,False,3
305120481,maint,maint,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,8.54946,55.606634,142.984438,40.036459,False,0,0,0,False,3


# Estimate

With the model setup for estimation, the next step is to estimate the model coefficients.  Make sure to use a sufficiently large enough household sample and set of zones to avoid an over-specified model, which does not have a numerically stable likelihood maximizing solution.  Larch has a built-in estimation methods including BHHH, and also offers access to more advanced general purpose non-linear optimizers in the `scipy` package, including SLSQP, which allows for bounds and constraints on parameters.  BHHH is the default and typically runs faster, but does not follow constraints on parameters.

In [7]:
model.estimate(method='SLSQP')

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
coefficient_at_work_sub_tour_asc_business1,1.421407,-0.5372,0.0,,,0,,1.421407
coefficient_at_work_sub_tour_asc_business2,-0.130778,-2.1337,0.0,,,0,,-0.130778
coefficient_at_work_sub_tour_asc_eat,5.809056,0.8576,0.0,,,0,,5.809056
coefficient_at_work_sub_tour_asc_eat_business,-23.415363,-0.9721,0.0,,,0,,-23.415363
coefficient_at_work_sub_tour_asc_maint,12.910479,-0.6198,0.0,,,0,,12.910479
...,...,...,...,...,...,...,...,...
coefficient_zero_cars_owned_by_hh_dummy_business1,-0.526876,-0.3391,0.0,,,0,,-0.526876
coefficient_zero_cars_owned_by_hh_dummy_business2,0.000000,0.0000,0.0,,,1,,0.000000
coefficient_zero_cars_owned_by_hh_dummy_eat,0.000000,0.0000,0.0,,,1,,0.000000
coefficient_zero_cars_owned_by_hh_dummy_eat_business,-19.116288,-0.3391,0.0,,,0,,-19.116288


  model.estimate(method='SLSQP')
  model.estimate(method='SLSQP')


Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
coefficient_at_work_sub_tour_asc_business1,1.421407e+00
coefficient_at_work_sub_tour_asc_business2,-1.307783e-01
coefficient_at_work_sub_tour_asc_eat,5.809056e+00
coefficient_at_work_sub_tour_asc_eat_business,-2.341536e+01
coefficient_at_work_sub_tour_asc_maint,1.291048e+01
coefficient_auto_accessibility_to_retail_for_work_taz_business1,-2.350832e-01
coefficient_auto_accessibility_to_retail_for_work_taz_business2,2.951134e+00
coefficient_auto_accessibility_to_retail_for_work_taz_eat,-7.610481e-01
coefficient_auto_accessibility_to_retail_for_work_taz_eat_business,1.515548e+00
coefficient_auto_accessibility_to_retail_for_work_taz_maint,-3.200651e+00

Unnamed: 0,0
coefficient_at_work_sub_tour_asc_business1,1.421407
coefficient_at_work_sub_tour_asc_business2,-0.1307783
coefficient_at_work_sub_tour_asc_eat,5.809056
coefficient_at_work_sub_tour_asc_eat_business,-23.41536
coefficient_at_work_sub_tour_asc_maint,12.91048
coefficient_auto_accessibility_to_retail_for_work_taz_business1,-0.2350832
coefficient_auto_accessibility_to_retail_for_work_taz_business2,2.951134
coefficient_auto_accessibility_to_retail_for_work_taz_eat,-0.7610481
coefficient_auto_accessibility_to_retail_for_work_taz_eat_business,1.515548
coefficient_auto_accessibility_to_retail_for_work_taz_maint,-3.200651

Unnamed: 0,0
coefficient_at_work_sub_tour_asc_business1,-0.0001481164
coefficient_at_work_sub_tour_asc_business2,-2.230083e-05
coefficient_at_work_sub_tour_asc_eat,0.0001390294
coefficient_at_work_sub_tour_asc_eat_business,0.0001539374
coefficient_at_work_sub_tour_asc_maint,-0.0001225496
coefficient_auto_accessibility_to_retail_for_work_taz_business1,-0.001603725
coefficient_auto_accessibility_to_retail_for_work_taz_business2,-0.0002219798
coefficient_auto_accessibility_to_retail_for_work_taz_eat,0.001752293
coefficient_auto_accessibility_to_retail_for_work_taz_eat_business,0.001320801
coefficient_auto_accessibility_to_retail_for_work_taz_maint,-0.001247389


### Estimated coefficients

In [8]:
model.parameter_summary()

Unnamed: 0,Value,Std Err,t Stat,Signif,Like Ratio,Null Value,Constrained
coefficient_at_work_sub_tour_asc_business1,1.42,,,[***],29.59,0.0,
coefficient_at_work_sub_tour_asc_business2,-0.131,15100.0,-0.00,,,0.0,
coefficient_at_work_sub_tour_asc_eat,5.81,484.0,0.01,,,0.0,
coefficient_at_work_sub_tour_asc_eat_business,-23.4,,,[***],BIG,0.0,
coefficient_at_work_sub_tour_asc_maint,12.9,523.0,0.02,,,0.0,
coefficient_auto_accessibility_to_retail_for_work_taz_business1,-0.235,,,[***],181.30,0.0,
coefficient_auto_accessibility_to_retail_for_work_taz_business2,2.95,2550.0,0.00,,,0.0,
coefficient_auto_accessibility_to_retail_for_work_taz_eat,-0.761,,,[***],714.04,0.0,
coefficient_auto_accessibility_to_retail_for_work_taz_eat_business,1.52,,,[***],41.92,0.0,
coefficient_auto_accessibility_to_retail_for_work_taz_maint,-3.2,,,[***],BIG,0.0,


# Output Estimation Results

In [9]:
from activitysim.estimation.larch import update_coefficients
result_dir = data.edb_directory/"estimated"
update_coefficients(
    model, data, result_dir,
    output_file=f"{modelname}_coefficients_revised.csv",
);

### Write the model estimation report, including coefficient t-statistic and log likelihood

In [10]:
model.to_xlsx(
    result_dir/f"{modelname}_model_estimation.xlsx", 
    data_statistics=False,
)

<larch.util.excel.ExcelWriter at 0x7fc251cd8550>

# Next Steps

The final step is to either manually or automatically copy the `*_coefficients_revised.csv` file to the configs folder, rename it to `*_coefficients.csv`, and run ActivitySim in simulation mode.

In [11]:
pd.read_csv(result_dir/f"{modelname}_coefficients_revised.csv")

Unnamed: 0,coefficient_name,value,constrain
0,coefficient_dummy_for_full_time_worker_business1,-11.365293,F
1,coefficient_dummy_for_full_time_worker_business2,-13.833915,F
2,coefficient_dummy_for_full_time_worker_eat,-9.545916,F
3,coefficient_dummy_for_full_time_worker_eat_bus...,-11.192172,F
4,coefficient_dummy_for_full_time_worker_maint,-5.880704,F
...,...,...,...
127,coefficient_at_work_sub_tour_asc_business2,-0.130778,F
128,coefficient_at_work_sub_tour_asc_eat,5.809056,F
129,coefficient_at_work_sub_tour_asc_eat_business,-23.415363,F
130,coefficient_at_work_sub_tour_asc_maint,12.910479,F
