# Estimating Trip Destination Choice

This notebook illustrates how to re-estimate a single model component for ActivitySim.  This process 
includes running ActivitySim in estimation mode to read household travel survey files and write out
the estimation data bundles used in this notebook.  To review how to do so, please visit the other
notebooks in this directory.

# Load libraries

In [1]:
import larch  # !conda install larch #for estimation
import pandas as pd
import numpy as np
import yaml 
import larch.util.excel
import os

We'll work in our `test` directory, where ActivitySim has saved the estimation data bundles.

In [2]:
os.chdir('test')

# Load data and prep model for estimation

In [3]:
modelname="trip_destination"

In [4]:
from activitysim.estimation.larch import component_model
model, data = component_model(modelname, return_data=True)

# Review data loaded from EDB

Next we can review what was read the EDB, including the coefficients, model settings, utilities specification, and chooser and alternative data.

## coefficients

In [5]:
data.coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coef_UNAVAILABLE,-999.0,T
coef_one,1.0,T
coef_mode_choice_logsum,1.821,F
coef_distance_joint,-0.1238,F
coef_util_distance_work_outbound,-0.049726,F
coef_util_distance_work_inbound,0.147813,F
coef_util_distance_univ,-0.0613,F
coef_util_distance_school,-0.1056,F
coef_util_distance_escort,-0.1491,F
coef_util_distance_shopping,-0.1192,F


## alt_values

In [6]:
data.alt_values

Unnamed: 0,trip_id,variable,1,2,3,4,5,6,7,8,...,181,182,183,184,185,186,187,188,189,190
0,123229,util_dest_not_accessible_by_this_tour_mode,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,123229,util_distance_inbound,6.390000343322754,6.170000076293945,5.920000076293945,5.810000419616699,5.299999713897705,5.170000076293945,4.840000152587891,4.630000114440918,...,9.819999694824219,11.25,10.449999809265137,8.970000267028809,7.920000076293945,9.600000381469727,9.889999389648438,10.079999923706055,11.899999618530273,12.170000076293945
2,123229,util_distance_joint,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,123229,util_distance_outbound,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,123229,util_mode_choice_logsum_os,-1.1826465123669618,-1.1342039462845261,-1.0045888362609303,-0.9695035841251944,-0.9086842324484546,-0.7911335856893744,-1.0394341719374236,-0.735128183005223,...,-0.5685844758479193,-0.7106540688185471,-0.6324772374186627,-0.5127860821542347,-0.427736842041275,-0.5989989879176718,-0.6352947097849363,-0.6732994275559964,-0.8319610659002946,-0.8302145411677482
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39139,2478375757,util_prox_home_inbound,5.659999847412109,5.570000171661377,5.5,5.329999923706055,5.119999885559082,5.190000057220459,5.019999980926514,4.809999942779541,...,6.260000228881836,5.440000057220459,5.110000133514404,4.889999866485596,4.329999923706055,4.5,3.990000009536743,3.4200000762939453,4.159999847412109,4.650000095367432
39140,2478375757,util_prox_home_outbound,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39141,2478375757,util_sample_of_alternatives_correction_factor,5.969689780743902,5.229744858144034,6.651885938411212,5.457984599784746,4.557500241901908,5.683456777589848,4.880351408439651,5.056172176409885,...,6.167159960103807,5.635222918217218,4.9055149705471015,5.197733187456251,4.545857557157909,5.146011961990624,5.677734711636542,4.0921031600858315,5.525577058677637,6.765177014979627
39142,2478375757,util_size_term,5.8928092787280315,6.61382841501164,5.174023179275996,6.331334576117736,7.181350090272405,6.062963941307298,6.824432363972737,6.596881001099327,...,5.988058753973149,6.313073295919817,6.959507591274848,6.611773525937498,7.122336273469753,6.565080445969589,5.957654370567811,7.481461558469711,6.2347224418357365,5.096060413644514


## chooser_data

In [7]:
data.chooser_data

Unnamed: 0,trip_id,model_choice,override_choice,person_id,household_id,tour_id,primary_purpose,trip_num,outbound,trip_count,purpose,next_trip_id,destination,origin,failed,tour_mode,trip_period,is_joint,destination_logsum
0,123229,7,69,375,375,15403,othmaint,1,False,3,shopping,123230,105,67,False,DRIVEALONEFREE,MD,False,
1,123230,9,106,375,375,15403,othmaint,2,False,3,eatout,123231,105,69,False,DRIVEALONEFREE,MD,False,
2,206561,7,9,629,629,25820,univ,1,True,2,escort,206562,12,131,False,WALK_HVY,MD,False,
3,265617,141,141,809,809,33202,shopping,1,True,2,shopping,265618,117,138,False,DRIVEALONEFREE,MD,False,
4,265621,127,127,809,809,33202,shopping,1,False,2,othmaint,265622,138,117,False,DRIVEALONEFREE,MD,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2791,2473349145,7,121,7540698,2849363,309168643,othdiscr,1,True,2,social,2473349146,56,92,False,SHARED3FREE,PM,False,
2792,2473471869,25,134,7541072,2849737,309183983,univ,1,False,2,escort,2473471870,117,107,False,TAXI,AM,False,
2793,2477980533,7,117,7554818,2863483,309747566,othmaint,1,False,2,shopping,2477980534,20,177,False,DRIVEALONEFREE,PM,False,
2794,2478086525,7,48,7555141,2863806,309760815,shopping,1,False,2,shopping,2478086526,85,41,False,SHARED2FREE,PM,False,


## landuse

In [8]:
data.landuse

Unnamed: 0_level_0,DISTRICT,SD,county_id,TOTHH,TOTPOP,TOTACRE,RESACRE,CIACRE,TOTEMP,AGE0519,...,area_type,HSENROLL,COLLFTE,COLLPTE,TOPOLOGY,TERMINAL,household_density,employment_density,density_index,is_cbd
zone_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,1,1,46,82,20.3,1.0,15.00000,27318,7,...,0,0.00000,0.00000,0.00000,3,5.89564,2.875000,1707.375000,2.870167,False
2,1,1,1,134,240,31.1,1.0,24.79297,42078,19,...,0,0.00000,0.00000,0.00000,1,5.84871,5.195214,1631.374751,5.178722,False
3,1,1,1,267,476,14.7,1.0,2.31799,2445,38,...,0,0.00000,0.00000,0.00000,1,5.53231,80.470405,736.891913,72.547987,False
4,1,1,1,151,253,19.3,1.0,18.00000,22434,20,...,0,0.00000,0.00000,0.00000,2,5.64330,7.947368,1180.736842,7.894233,False
5,1,1,1,611,1069,52.7,1.0,15.00000,15662,86,...,0,0.00000,72.14684,0.00000,1,5.52555,38.187500,978.875000,36.753679,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,4,4,1,2779,8062,376.0,172.0,15.00000,1760,1178,...,3,0.00000,0.00000,0.00000,1,2.04173,14.860963,9.411765,5.762347,False
187,4,4,1,1492,4139,214.0,116.0,10.00000,808,603,...,3,0.00000,0.00000,0.00000,2,1.73676,11.841270,6.412698,4.159890,False
188,4,4,1,753,4072,232.0,11.0,178.00000,4502,1117,...,2,3961.04761,17397.79102,11152.93652,1,2.28992,3.984127,23.820106,3.413233,False
189,4,4,1,3546,8476,201.0,72.0,6.00000,226,1057,...,2,0.00000,0.00000,0.00000,1,2.88773,45.461538,2.897436,2.723836,False


## spec

In [9]:
data.spec

Unnamed: 0,Label,Description,Expression,work,univ,school,escort,shopping,eatout,othmaint,social,othdiscr,atwork
0,util_size_term,size term,"@np.log1p(size_terms.get(df.dest_taz, df.purpo...",coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one
1,util_no_attractions,no attractions,"@size_terms.get(df.dest_taz, df.purpose) == 0",coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE,coef_UNAVAILABLE
2,util_distance_inbound,distance (calibration adjustment individual - ...,@(~df.is_joint & ~df.outbound) * (_od_DIST + _...,coef_util_distance_work_outbound,coef_util_distance_univ,coef_util_distance_school,coef_util_distance_escort,coef_util_distance_shopping,coef_util_distance_eatout,coef_util_distance_othmaint,coef_util_distance_social,coef_util_distance_othdiscr,coef_util_distance_atwork
3,util_distance_outbound,distance (calibration adjustment individual - ...,@(~df.is_joint & df.outbound) * (_od_DIST + _d...,coef_util_distance_work_inbound,coef_util_distance_univ,coef_util_distance_school,coef_util_distance_escort,coef_util_distance_shopping,coef_util_distance_eatout,coef_util_distance_othmaint,coef_util_distance_social,coef_util_distance_othdiscr,coef_util_distance_atwork
4,util_distance_joint,distance (calibration adjustment joint),@df.is_joint * (_od_DIST + _dp_DIST),,,,coef_distance_joint,coef_distance_joint,coef_distance_joint,coef_distance_joint,coef_distance_joint,coef_distance_joint,
5,util_prox_home_outbound,stop proximity to home (outbound),@df.outbound * _od_DIST,coef_prox_home_outbound_work,,,,,,,,,
6,util_prox_home_inbound,stop proximity to home (inbound),@~df.outbound * _dp_DIST,coef_prox_home_inbound_work,,,,,,,,,
7,util_prox_dest_outbound,stop proximity to main destination (outbound),@df.outbound * _dp_DIST,coef_prox_dest_outbound_work,,,,,,,,,
8,util_prox_dest_inbound,stop proximity to main destination (inbound),@~df.outbound * _od_DIST,,,,,,,,,,
9,util_sample_of_alternatives_correction_factor,Sample of alternatives correction factor,"@np.minimum(np.log(df.pick_count/df.prob), 60)",coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one,coef_one


## size_spec

In [10]:
data.size_spec

Unnamed: 0_level_0,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
work,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
escort,0.001,0.225,0.0,0.144,0.0,0.0,0.0,0.464,0.166,0.0,0.0
shopping,0.001,0.999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
eatout,0.0,0.742,0.0,0.258,0.0,0.0,0.0,0.0,0.0,0.0,0.0
othmaint,0.001,0.481,0.0,0.518,0.0,0.0,0.0,0.0,0.0,0.0,0.0
social,0.001,0.521,0.0,0.478,0.0,0.0,0.0,0.0,0.0,0.0,0.0
othdiscr,0.252,0.212,0.0,0.272,0.165,0.0,0.0,0.0,0.098,0.0,0.0
univ,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.592,0.408


# Estimate

With the model setup for estimation, the next step is to estimate the model coefficients.  Make sure to use a sufficiently large enough household sample and set of zones to avoid an over-specified model, which does not have a numerically stable likelihood maximizing solution.  Larch has a built-in estimation methods including BHHH, and also offers access to more advanced general purpose non-linear optimizers in the `scipy` package, including SLSQP, which allows for bounds and constraints on parameters.  BHHH is the default and typically runs faster, but does not follow constraints on parameters.

In [11]:
model.estimate(method='SLSQP', options={'maxiter':1000})

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
coef_UNAVAILABLE,-999.0,-999.0,0.0,,,1,,-999.0
coef_distance_joint,-0.376392,-0.1238,0.0,,,0,,-0.376392
coef_mode_choice_logsum,0.163028,1.821,0.0,,,0,,0.163028
coef_one,1.0,1.0,0.0,,,1,,1.0
coef_prox_dest_outbound_work,-0.359504,-0.26,0.0,,,0,,-0.359504
coef_prox_home_inbound_work,-0.174104,-0.15,0.0,,,0,,-0.174104
coef_prox_home_outbound_work,-0.522711,-0.38,0.0,,,0,,-0.522711
coef_util_distance_atwork,-0.122335,-0.122335,0.0,,,0,,-0.122335
coef_util_distance_eatout,-0.366193,-0.1029,0.0,,,0,,-0.366193
coef_util_distance_escort,-0.337785,-0.1491,0.0,,,0,,-0.337785


  model.estimate(method='SLSQP', options={'maxiter':1000})
  model.estimate(method='SLSQP', options={'maxiter':1000})


Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
coef_UNAVAILABLE,-999.000000
coef_distance_joint,-0.376392
coef_mode_choice_logsum,0.163028
coef_one,1.000000
coef_prox_dest_outbound_work,-0.359504
coef_prox_home_inbound_work,-0.174104
coef_prox_home_outbound_work,-0.522711
coef_util_distance_atwork,-0.122335
coef_util_distance_eatout,-0.366193
coef_util_distance_escort,-0.337785

Unnamed: 0,0
coef_UNAVAILABLE,-999.0
coef_distance_joint,-0.376392
coef_mode_choice_logsum,0.163028
coef_one,1.0
coef_prox_dest_outbound_work,-0.359504
coef_prox_home_inbound_work,-0.174104
coef_prox_home_outbound_work,-0.522711
coef_util_distance_atwork,-0.122335
coef_util_distance_eatout,-0.366193
coef_util_distance_escort,-0.337785

Unnamed: 0,0
coef_UNAVAILABLE,0.0
coef_distance_joint,-0.001545
coef_mode_choice_logsum,-0.037169
coef_one,0.0
coef_prox_dest_outbound_work,-0.004856
coef_prox_home_inbound_work,-0.00069
coef_prox_home_outbound_work,0.006258
coef_util_distance_atwork,0.0
coef_util_distance_eatout,0.000901
coef_util_distance_escort,0.0016


### Estimated coefficients

In [12]:
model.parameter_summary()

Unnamed: 0,Value,Std Err,t Stat,Signif,Like Ratio,Null Value,Constrained
coef_UNAVAILABLE,-999.0,,,,,0.0,fixed value
coef_distance_joint,-0.376,0.0762,-4.94,***,,0.0,
coef_mode_choice_logsum,0.163,0.00797,20.46,***,,0.0,
coef_one,1.0,,,,,0.0,fixed value
coef_prox_dest_outbound_work,-0.36,441.0,-0.00,,,0.0,
coef_prox_home_inbound_work,-0.174,0.0368,-4.73,***,,0.0,
coef_prox_home_outbound_work,-0.523,441.0,-0.00,,,0.0,
coef_util_distance_atwork,-0.122,2.43e-06,-BIG,***,,0.0,
coef_util_distance_eatout,-0.366,0.063,-5.81,***,,0.0,
coef_util_distance_escort,-0.338,0.0329,-10.28,***,,0.0,


# Output Estimation Results

In [13]:
from activitysim.estimation.larch import update_coefficients, update_size_spec
result_dir = data.edb_directory/"estimated"

## Write updated utility coefficients

In [14]:
update_coefficients(
    model, data, result_dir,
    output_file=f"{modelname}_coefficients_revised.csv",
);

## Write updated size coefficients

In [15]:
update_size_spec(
    model, data, result_dir, 
    output_file=f"{modelname}_size_terms.csv",
)

Unnamed: 0,segment,model_selector,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE
0,work_low,workplace,0.0,0.129129,0.193193,0.383383,0.12012,0.01001,0.164164,0.0,0.0,0.0,0.0
1,work_med,workplace,0.0,0.12012,0.197197,0.325325,0.139139,0.008008,0.21021,0.0,0.0,0.0,0.0
2,work_high,workplace,0.0,0.11,0.207,0.284,0.154,0.006,0.239,0.0,0.0,0.0,0.0
3,work_veryhigh,workplace,0.0,0.093,0.27,0.241,0.146,0.004,0.246,0.0,0.0,0.0,0.0
4,university,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.592,0.408
5,gradeschool,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,highschool,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,escort,non_mandatory,0.0,0.225,0.0,0.144,0.0,0.0,0.0,0.465,0.166,0.0,0.0
8,shopping,non_mandatory,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,eatout,non_mandatory,0.0,0.742,0.0,0.258,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Write the model estimation report, including coefficient t-statistic and log likelihood

In [16]:
model.to_xlsx(
    result_dir/f"{modelname}_model_estimation.xlsx", 
    data_statistics=False,
);

# Next Steps

The final step is to either manually or automatically copy the `*_coefficients_revised.csv` file and `*_size_terms.csv` file to the configs folder, rename them to `*_coefficients.csv` and `destination_choice_size_terms.csv`, and run ActivitySim in simulation mode.  Note that all the location
and desintation choice models share the same `destination_choice_size_terms.csv` input file, so if you
are updating all these models, you'll need to ensure that updated sections of this file for each model
are joined together correctly.

In [17]:
pd.read_csv(result_dir/f"{modelname}_coefficients_revised.csv")

Unnamed: 0,coefficient_name,value,constrain
0,coef_UNAVAILABLE,-999.0,T
1,coef_one,1.0,T
2,coef_mode_choice_logsum,0.163028,F
3,coef_distance_joint,-0.376392,F
4,coef_util_distance_work_outbound,-0.300511,F
5,coef_util_distance_work_inbound,-0.094761,F
6,coef_util_distance_univ,-0.18638,F
7,coef_util_distance_school,-0.1056,F
8,coef_util_distance_escort,-0.337785,F
9,coef_util_distance_shopping,-0.390483,F


In [18]:
pd.read_csv(result_dir/f"{modelname}_size_terms.csv")

Unnamed: 0,index,segment,model_selector,TOTHH,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN,AGE0519,HSENROLL,COLLFTE,COLLPTE
0,0,work_low,workplace,0.0,0.129129,0.193193,0.383383,0.12012,0.01001,0.164164,0.0,0.0,0.0,0.0
1,1,work_med,workplace,0.0,0.12012,0.197197,0.325325,0.139139,0.008008,0.21021,0.0,0.0,0.0,0.0
2,2,work_high,workplace,0.0,0.11,0.207,0.284,0.154,0.006,0.239,0.0,0.0,0.0,0.0
3,3,work_veryhigh,workplace,0.0,0.093,0.27,0.241,0.146,0.004,0.246,0.0,0.0,0.0,0.0
4,4,university,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.592,0.408
5,5,gradeschool,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,6,highschool,school,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
7,7,escort,non_mandatory,0.0,0.225,0.0,0.144,0.0,0.0,0.0,0.465,0.166,0.0,0.0
8,8,shopping,non_mandatory,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,9,eatout,non_mandatory,0.0,0.742,0.0,0.258,0.0,0.0,0.0,0.0,0.0,0.0,0.0
