# Estimating Non-Mandatory Tour Scheduling

This notebook illustrates how to re-estimate the non-mandatory tour scheduling component for ActivitySim.  This process 
includes running ActivitySim in estimation mode to read household travel survey files and write out
the estimation data bundles used in this notebook.  To review how to do so, please visit the other
notebooks in this directory.

# Load libraries

In [1]:
import os
import larch  # !conda install larch -c conda-forge # for estimation
import pandas as pd

We'll work in our `test` directory, where ActivitySim has saved the estimation data bundles.

In [2]:
os.chdir('test')

# Load data and prep model for estimation

In [3]:
modelname = "non_mandatory_tour_scheduling"

from activitysim.estimation.larch import component_model
model, data = component_model(modelname, return_data=True)

# Review data loaded from the EDB

The next (optional) step is to review the EDB, including the coefficients, utilities specification, and chooser and alternative data.

## Coefficients

In [4]:
data.coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coef_dummy,1.000000,T
coef_subsequent_tour_must_start_after_previous_tour_for_this_purpose_ends,-999.000000,T
coef_free_flow_round_trip_auto_time_shift_effects_duration,0.004741,F
coef_shopping_tour_departure_shift_effects,-0.060150,F
coef_shopping_tour_duration_shift_effects,-0.120800,F
...,...,...
coef_escort_tour_duration_constants_4_to_5_hours,-2.880294,F
coef_escort_tour_duration_constants_6_to_7_hours,-2.973534,F
coef_escort_tour_duration_constants_8_to_10_hours,-3.020214,F
coef_escort_tour_duration_constants_11_to_13_hours,-2.974365,F


## Utility specification

In [5]:
data.spec

Unnamed: 0,Label,Description,Expression,Coefficient
0,util_subsequent_tour_must_start_after_previous...,Subsequent tour must start after previous tour...,(start < end_previous) & (tour_type_num > 1),coef_subsequent_tour_must_start_after_previous...
1,util_free_flow_round_trip_auto_time_shift_effe...,Free-flow round trip auto time shift effects -...,roundtrip_auto_time_to_work * duration,coef_free_flow_round_trip_auto_time_shift_effe...
2,util_shopping_tour_departure_shift_effects,Shopping tour - departure shift effects,(tour_type == 'shopping') * start,coef_shopping_tour_departure_shift_effects
3,util_shopping_tour_duration_shift_effects,Shopping tour - duration shift effects,(tour_type == 'shopping') * duration,coef_shopping_tour_duration_shift_effects
4,util_maintenance_tour_departure_shift_effects,Maintenance tour - departure shift effects,(tour_type == 'othmaint') * start,coef_maintenance_tour_departure_shift_effects
...,...,...,...,...
86,util_escort_tour_duration_constants_4_to_5_hours,Escort Tour Duration Constants -- 4 to 5 hours,(tour_type == 'escort') & (duration > 3) & (du...,coef_escort_tour_duration_constants_4_to_5_hours
87,util_escort_tour_duration_constants_6_to_7_hours,Escort Tour Duration Constants -- 6 to 7 hours,(tour_type == 'escort') & (duration > 5) & (du...,coef_escort_tour_duration_constants_6_to_7_hours
88,util_escort_tour_duration_constants_8_to_10_hours,Escort Tour Duration Constants -- 8 to 10 hours,(tour_type == 'escort') & (duration > 7) & (du...,coef_escort_tour_duration_constants_8_to_10_hours
89,util_escort_tour_duration_constants_11_to_13_h...,Escort Tour Duration Constants -- 11 to 13 hours,(tour_type == 'escort') & (duration > 10) & (d...,coef_escort_tour_duration_constants_11_to_13_h...


## Chooser data

In [6]:
data.chooser_data

Unnamed: 0,tour_id,model_choice,override_choice,person_id,tour_type,tour_type_count,tour_type_num,tour_num,tour_count,tour_category,...,num_person_joint_tours,ptype,num_children,roundtrip_auto_time_to_work,num_mand,num_escort_tours,num_non_escort_tours,adult,start_previous,end_previous
0,6812,118,118,166,eatout,1,1,1,1,non_mandatory,...,0,4,0,0.0,0,0,1,True,5,5
1,8110,112,112,197,shopping,1,1,1,1,non_mandatory,...,0,4,0,0.0,0,0,1,True,5,5
2,11013,169,169,268,othdiscr,1,1,2,2,non_mandatory,...,0,4,0,0.0,0,0,2,True,12,15
3,11016,115,115,268,othmaint,1,1,1,2,non_mandatory,...,0,4,0,0.0,0,0,2,True,5,5
4,15403,99,99,375,othmaint,1,1,1,1,non_mandatory,...,0,4,0,0.0,0,0,1,True,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2485,309760814,71,71,7555141,shopping,2,1,1,3,non_mandatory,...,0,5,0,0.0,0,0,3,True,5,5
2486,309760815,137,137,7555141,shopping,2,2,2,3,non_mandatory,...,0,5,0,0.0,0,0,3,True,9,10
2487,309790009,109,109,7555853,social,1,1,1,1,non_mandatory,...,0,5,0,0.0,0,0,1,True,5,5
2488,309796968,146,146,7556023,othdiscr,2,1,1,2,non_mandatory,...,0,5,0,0.0,0,0,2,True,5,5


## Alternatives data

In [7]:
data.alt_values

Unnamed: 0,tour_id,variable,0,1,2,3,4,5,6,7,...,180,181,182,183,184,185,186,187,188,189
0,6812,duration,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,...,0.0,1.0,2.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0
1,6812,end,5.0,6.0,7.0,8.0,9.0,10.0,11.0,12.0,...,20.0,21.0,22.0,23.0,21.0,22.0,23.0,22.0,23.0,23.0
2,6812,mode_choice_logsum,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,6812,start,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,20.0,20.0,20.0,20.0,21.0,21.0,21.0,22.0,22.0,23.0
4,6812,util_adjacent_window_exists_after_this_arrival...,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
231565,309796969,util_subsequent_of_2_plus_tours_for_same_purpo...,0,1,2,3,4,5,6,7,...,0,1,2,3,0,1,2,0,1,0
231566,309796969,util_subsequent_tour_must_start_after_previous...,True,True,True,True,True,True,True,True,...,False,False,False,False,False,False,False,False,False,False
231567,309796969,util_university_student_arrive_after_22,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
231568,309796969,util_visit_tour_departure_shift_effects_start,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# Estimate

With the model setup for estimation, the next step is to estimate the model coefficients.  Make sure to use a sufficiently large enough household sample and set of zones to avoid an over-specified model, which does not have a numerically stable likelihood maximizing solution.  Larch has a built-in estimation methods including BHHH, and also offers access to more advanced general purpose non-linear optimizers in the `scipy` package, including SLSQP, which allows for bounds and constraints on parameters.  BHHH is the default and typically runs faster, but does not follow constraints on parameters.

In [8]:
model.estimate()

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
coef_adjacent_window_exists_after_this_arrival_hour_first_tour_interaction,-0.127954,-0.025700,0.0,-25.0,25.0,0,,-0.127954
coef_adjacent_window_exists_after_this_arrival_hour_second_plus_tour_interaction,0.093954,-0.027340,0.0,-25.0,25.0,0,,0.093954
coef_adjacent_window_exists_before_this_departure_hour_first_tour_interaction,0.253749,0.008442,0.0,-25.0,25.0,0,,0.253749
coef_adjacent_window_exists_before_this_departure_hour_second_plus_tour_interaction,-0.198343,-0.059300,0.0,-25.0,25.0,0,,-0.198343
coef_adult_with_children_in_hh_arrive_19_21,0.309424,0.336000,0.0,-25.0,25.0,0,,0.309424
...,...,...,...,...,...,...,...,...
coef_subsequent_of_2_plus_tours_for_same_purpose_duration_shift_effect,-0.115107,-0.173100,0.0,-25.0,25.0,0,,-0.115107
coef_subsequent_tour_must_start_after_previous_tour_for_this_purpose_ends,-999.000000,-999.000000,0.0,-25.0,25.0,1,,-999.000000
coef_university_student_arrive_after_22,-0.537299,0.546600,0.0,-25.0,25.0,0,,-0.537299
coef_visit_tour_departure_shift_effects,0.169598,0.096880,0.0,-25.0,25.0,0,,0.169598




Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
coef_adjacent_window_exists_after_this_arrival_hour_first_tour_interaction,-0.127954
coef_adjacent_window_exists_after_this_arrival_hour_second_plus_tour_interaction,0.093954
coef_adjacent_window_exists_before_this_departure_hour_first_tour_interaction,0.253749
coef_adjacent_window_exists_before_this_departure_hour_second_plus_tour_interaction,-0.198343
coef_adult_with_children_in_hh_arrive_19_21,0.309424
coef_arrival_constants_am_peak,0.423382
coef_arrival_constants_early,2.912542
coef_arrival_constants_evening,0.000000
coef_arrival_constants_late,-1.191037
coef_arrival_constants_midday_1,1.483808

Unnamed: 0,0
coef_adjacent_window_exists_after_this_arrival_hour_first_tour_interaction,-0.127954
coef_adjacent_window_exists_after_this_arrival_hour_second_plus_tour_interaction,0.093954
coef_adjacent_window_exists_before_this_departure_hour_first_tour_interaction,0.253749
coef_adjacent_window_exists_before_this_departure_hour_second_plus_tour_interaction,-0.198343
coef_adult_with_children_in_hh_arrive_19_21,0.309424
coef_arrival_constants_am_peak,0.423382
coef_arrival_constants_early,2.912542
coef_arrival_constants_evening,0.0
coef_arrival_constants_late,-1.191037
coef_arrival_constants_midday_1,1.483808

Unnamed: 0,0
coef_adjacent_window_exists_after_this_arrival_hour_first_tour_interaction,0.000114
coef_adjacent_window_exists_after_this_arrival_hour_second_plus_tour_interaction,-0.000385
coef_adjacent_window_exists_before_this_departure_hour_first_tour_interaction,9.7e-05
coef_adjacent_window_exists_before_this_departure_hour_second_plus_tour_interaction,-0.000205
coef_adult_with_children_in_hh_arrive_19_21,0.000294
coef_arrival_constants_am_peak,3.7e-05
coef_arrival_constants_early,0.000111
coef_arrival_constants_evening,0.0
coef_arrival_constants_late,0.000194
coef_arrival_constants_midday_1,-0.001155


### Estimated coefficients

In [9]:
model.parameter_summary()

Unnamed: 0,Value,Std Err,t Stat,Signif,Null Value,Constrained
coef_adjacent_window_exists_after_this_arrival_hour_first_tour_interaction,-0.128,0.141,-0.91,,0.0,
coef_adjacent_window_exists_after_this_arrival_hour_second_plus_tour_interaction,0.094,0.137,0.69,,0.0,
coef_adjacent_window_exists_before_this_departure_hour_first_tour_interaction,0.254,0.148,1.72,,0.0,
coef_adjacent_window_exists_before_this_departure_hour_second_plus_tour_interaction,-0.198,0.14,-1.42,,0.0,
coef_adult_with_children_in_hh_arrive_19_21,0.309,0.136,2.27,*,0.0,
coef_arrival_constants_am_peak,0.423,0.303,1.4,,0.0,
coef_arrival_constants_early,2.91,0.429,6.78,***,0.0,
coef_arrival_constants_evening,0.0,,,,0.0,fixed value
coef_arrival_constants_late,-1.19,0.133,-8.96,***,0.0,
coef_arrival_constants_midday_1,1.48,0.22,6.75,***,0.0,


# Output Estimation Results

In [10]:
from activitysim.estimation.larch import update_coefficients
result_dir = data.edb_directory/"estimated"
update_coefficients(
    model, data, result_dir,
    output_file=f"{modelname}_coefficients_revised.csv",
);

### Write the model estimation report, including coefficient t-statistic and log likelihood

In [11]:
model.to_xlsx(
    result_dir/f"{modelname}_model_estimation.xlsx", 
    data_statistics=False,
)

<larch.util.excel.ExcelWriter at 0x7fc610c40760>

# Next Steps

The final step is to either manually or automatically copy the `*_coefficients_revised.csv` file to the configs folder, rename it to `*_coefficients.csv`, and run ActivitySim in simulation mode.

In [12]:
pd.read_csv(result_dir/f"{modelname}_coefficients_revised.csv")

Unnamed: 0,coefficient_name,value,constrain
0,coef_dummy,1.000000,T
1,coef_subsequent_tour_must_start_after_previous...,-999.000000,T
2,coef_free_flow_round_trip_auto_time_shift_effe...,0.004992,F
3,coef_shopping_tour_departure_shift_effects,0.015067,F
4,coef_shopping_tour_duration_shift_effects,-0.075331,F
...,...,...,...
85,coef_escort_tour_duration_constants_4_to_5_hours,-3.522889,F
86,coef_escort_tour_duration_constants_6_to_7_hours,-3.979277,F
87,coef_escort_tour_duration_constants_8_to_10_hours,-5.158316,F
88,coef_escort_tour_duration_constants_11_to_13_h...,-6.393148,F
