# Estimating Auto Ownership

Integration with [larch](https://larch.newman.me) for model estimation. See [estimation tools review](https://github.com/ActivitySim/activitysim/wiki/Estimation-Tools-Review) for more information about larch.

# Run the Example

Output an estimation data bundle (EBD), which contains:
  - model settings - auto_ownership_model_settings.yaml
  - coefficients - auto_ownership_coefficients.csv
  - utilities specification - auto_ownership_SPEC.csv
  - chooser and alternative data combined into one file - auto_ownership_values_combined.csv
  - chooses made - auto_ownership_choices.csv

# Read EDB 

In [1]:
import os
import larch  # !conda install larch #for estimation
import pandas as pd
import larch_asim  # utility functions in a local module

In [2]:
edb_directory = "estimation_data_bundle/auto_ownership/"

def read_csv(filename, **kwargs):
    return pd.read_csv(os.path.join(edb_directory, filename), **kwargs)

In [3]:
coefficients = read_csv("auto_ownership_coefficients.csv", index_col='coefficient_name')
spec = read_csv("auto_ownership_SPEC.csv")
chooser_data = read_csv("auto_ownership_values_combined.csv")
choices = read_csv("auto_ownership_choices.csv")

In [4]:
coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coef_cars1_drivers_2,0.0000,T
coef_cars1_drivers_3,0.0000,T
coef_cars1_persons_16_17,0.0000,T
coef_cars234_asc_marin,0.0000,T
coef_cars1_persons_25_34,0.0000,T
...,...,...
coef_cars4_drivers_3,5.2080,F
coef_cars3_drivers_3,5.5131,F
coef_cars2_drivers_4_up,6.3662,F
coef_cars3_drivers_4_up,8.5148,F


In [5]:
spec

Unnamed: 0,Label,Description,Expression,cars0,cars1,cars2,cars3,cars4
0,util_drivers_2,2 Adults (age 16+),num_drivers==2,,coef_cars1_drivers_2,coef_cars2_drivers_2,coef_cars3_drivers_2,coef_cars4_drivers_2
1,util_drivers_3,3 Adults (age 16+),num_drivers==3,,coef_cars1_drivers_3,coef_cars2_drivers_3,coef_cars3_drivers_3,coef_cars4_drivers_3
2,util_drivers_4_up,4+ Adults (age 16+),num_drivers>3,,coef_cars1_drivers_4_up,coef_cars2_drivers_4_up,coef_cars3_drivers_4_up,coef_cars4_drivers_4_up
3,util_persons_16_17,Persons age 16-17,num_children_16_to_17,,coef_cars1_persons_16_17,coef_cars2_persons_16_17,coef_cars34_persons_16_17,coef_cars34_persons_16_17
4,util_persons_18_24,Persons age 18-24,num_college_age,,coef_cars1_persons_18_24,coef_cars2_persons_18_24,coef_cars34_persons_18_24,coef_cars34_persons_18_24
5,util_persons_25_34,Persons age 35-34,num_young_adults,,coef_cars1_persons_25_34,coef_cars2_persons_25_34,coef_cars34_persons_25_34,coef_cars34_persons_25_34
6,util_presence_children_0_4,Presence of children age 0-4,num_young_children>0,,coef_cars1_presence_children_0_4,coef_cars234_presence_children_0_4,coef_cars234_presence_children_0_4,coef_cars234_presence_children_0_4
7,util_presence_children_5_17,Presence of children age 5-17,(num_children_5_to_15+num_children_16_to_17)>0,,coef_cars1_presence_children_5_17,coef_cars2_presence_children_5_17,coef_cars34_presence_children_5_17,coef_cars34_presence_children_5_17
8,util_num_workers_clip_3,"Number of workers, capped at 3",@df.num_workers.clip(upper=3),,coef_cars1_num_workers_clip_3,coef_cars2_num_workers_clip_3,coef_cars3_num_workers_clip_3,coef_cars4_num_workers_clip_3
9,util_hh_income_0_30k,"Piecewise Linear household income, $0-30k","@df.income_in_thousands.clip(0, 30)",,coef_cars1_hh_income_0_30k,coef_cars2_hh_income_0_30k,coef_cars3_hh_income_0_30k,coef_cars4_hh_income_0_30k


In [6]:
chooser_data

Unnamed: 0,household_id,model_choice,override_choice,util_drivers_2,util_drivers_3,util_drivers_4_up,util_persons_16_17,util_persons_18_24,util_persons_25_34,util_presence_children_0_4,...,OPRKCST,area_type,HSENROLL,COLLFTE,COLLPTE,TOPOLOGY,TERMINAL,household_density,employment_density,density_index
0,33,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,102.0,1,0.00000,0.00000,0.0,1,4.37996,86.151401,29.646921,22.056656
1,247,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,2,340.63205,0.00000,0.0,2,3.88504,51.196078,27.392157,17.844541
2,649,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3,947.84723,0.00000,0.0,1,2.17494,12.539483,15.866114,7.004002
3,734,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3,0.00000,0.00000,0.0,1,2.36725,18.076301,7.955839,5.524407
4,1216,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3,0.00000,0.00000,0.0,1,2.45427,17.990741,7.481481,5.284085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2864113,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1,0.00000,3598.08521,0.0,1,3.29100,11.947644,45.167539,9.448375
1996,2864518,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1,0.00000,0.00000,0.0,1,25.52083,15.938148,551.353820,15.490363
1997,2864640,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1,0.00000,0.00000,0.0,1,4.57400,33.716981,52.037736,20.460161
1998,2864781,0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3,0.00000,0.00000,0.0,1,3.29134,16.000000,31.644068,10.626823


In [7]:
choices

Unnamed: 0,household_id,model_choice
0,485346,1
1,121103,1
2,933459,1
3,504600,1
4,1235090,0
...,...,...
1995,2629695,2
1996,949800,0
1997,2109796,1
1998,113141,0


# Data Setup

In [8]:
from larch import P, X

altnames = list(spec.columns[3:])
altcodes = range(len(altnames))

In [9]:
m = larch.Model()

One of the alternatives is coded as 0, so
we need to explicitly initialize the MNL nesting graph
and set to root_id to a value other than zero.

In [10]:
m.initialize_graph(alternative_codes=altcodes, root_id=99)

In [11]:
m.utility_co = larch_asim.dict_of_linear_utility_from_spec(
    spec, 'Label', dict(zip(altnames,altcodes)),
)
m.utility_co

alt,formula
0,<Empty LinearFunction_C>
1,P.coef_cars1_drivers_2 * X.util_drivers_2 + P.coef_cars1_drivers_3 * X.util_drivers_3 + P.coef_cars1_drivers_4_up * X.util_drivers_4_up + P.coef_cars1_persons_16_17 * X.util_persons_16_17 + P.coef_cars1_persons_18_24 * X.util_persons_18_24 + P.coef_cars1_persons_25_34 * X.util_persons_25_34 + P.coef_cars1_presence_children_0_4 * X.util_presence_children_0_4 + P.coef_cars1_presence_children_5_17 * X.util_presence_children_5_17 + P.coef_cars1_num_workers_clip_3 * X.util_num_workers_clip_3 + P.coef_cars1_hh_income_0_30k * X.util_hh_income_0_30k + P.coef_cars1_hh_income_30_up * X.util_hh_income_30_75k + P.coef_cars1_hh_income_30_up * X.util_hh_income_75k_up + P.coef_cars1_density_0_10_no_workers * X.util_density_0_10_no_workers + P.coef_cars1_density_10_up_no_workers * X.util_density_10_up_no_workers + P.coef_cars1_density_0_10_no_workers * X.util_density_0_10_workers + P.coef_cars1_density_10_up_workers * X.util_density_10_up_workers + P.coef_cars1_asc * X.util_asc + P.coef_cars1_asc_san_francisco * X.util_asc_san_francisco + P.coef_cars1_asc_county * X.util_asc_solano + P.coef_cars1_asc_county * X.util_asc_napa + P.coef_cars1_asc_county * X.util_asc_sonoma + P.coef_cars1_asc_marin * X.util_asc_marin + P.coef_retail_auto_no_workers * X.util_retail_auto_no_workers + P.coef_retail_auto_workers * X.util_retail_auto_workers + P.coef_retail_transit_no_workers * X.util_retail_transit_no_workers + P.coef_retail_transit_workers * X.util_retail_transit_workers + P.coef_retail_non_motor * X.util_retail_non_motor_no_workers + P.coef_retail_non_motor * X.util_retail_non_motor_workers + P.coef_cars1_auto_time_saving_per_worker * X.util_auto_time_saving_per_worker
2,P.coef_cars2_drivers_2 * X.util_drivers_2 + P.coef_cars2_drivers_3 * X.util_drivers_3 + P.coef_cars2_drivers_4_up * X.util_drivers_4_up + P.coef_cars2_persons_16_17 * X.util_persons_16_17 + P.coef_cars2_persons_18_24 * X.util_persons_18_24 + P.coef_cars2_persons_25_34 * X.util_persons_25_34 + P.coef_cars234_presence_children_0_4 * X.util_presence_children_0_4 + P.coef_cars2_presence_children_5_17 * X.util_presence_children_5_17 + P.coef_cars2_num_workers_clip_3 * X.util_num_workers_clip_3 + P.coef_cars2_hh_income_0_30k * X.util_hh_income_0_30k + P.coef_cars2_hh_income_30_up * X.util_hh_income_30_75k + P.coef_cars2_hh_income_30_up * X.util_hh_income_75k_up + P.coef_cars2_density_0_10_no_workers * X.util_density_0_10_no_workers + P.coef_cars2_density_10_up_no_workers * X.util_density_10_up_no_workers + P.coef_cars2_density_0_10_no_workers * X.util_density_0_10_workers + P.coef_cars2_density_10_up_no_workers * X.util_density_10_up_workers + P.coef_cars2_asc * X.util_asc + P.coef_cars2_asc_san_francisco * X.util_asc_san_francisco + P.coef_cars2_asc_county * X.util_asc_solano + P.coef_cars2_asc_county * X.util_asc_napa + P.coef_cars2_asc_county * X.util_asc_sonoma + P.coef_cars234_asc_marin * X.util_asc_marin + P.coef_retail_auto_no_workers * X.util_retail_auto_no_workers + P.coef_retail_auto_workers * X.util_retail_auto_workers + P.coef_retail_transit_no_workers * X.util_retail_transit_no_workers + P.coef_retail_transit_workers * X.util_retail_transit_workers + P.coef_retail_non_motor * X.util_retail_non_motor_no_workers + P.coef_retail_non_motor * X.util_retail_non_motor_workers + P.coef_cars2_auto_time_saving_per_worker * X.util_auto_time_saving_per_worker
3,P.coef_cars3_drivers_2 * X.util_drivers_2 + P.coef_cars3_drivers_3 * X.util_drivers_3 + P.coef_cars3_drivers_4_up * X.util_drivers_4_up + P.coef_cars34_persons_16_17 * X.util_persons_16_17 + P.coef_cars34_persons_18_24 * X.util_persons_18_24 + P.coef_cars34_persons_25_34 * X.util_persons_25_34 + P.coef_cars234_presence_children_0_4 * X.util_presence_children_0_4 + P.coef_cars34_presence_children_5_17 * X.util_presence_children_5_17 + P.coef_cars3_num_workers_clip_3 * X.util_num_workers_clip_3 + P.coef_cars3_hh_income_0_30k * X.util_hh_income_0_30k + P.coef_cars3_hh_income_30_up * X.util_hh_income_30_75k + P.coef_cars3_hh_income_30_up * X.util_hh_income_75k_up + P.coef_cars34_density_0_10_no_workers * X.util_density_0_10_no_workers + P.coef_cars34_density_10_up_no_workers * X.util_density_10_up_no_workers + P.coef_cars34_density_0_10_no_workers * X.util_density_0_10_workers + P.coef_cars34_density_10_up_no_workers * X.util_density_10_up_workers + P.coef_cars3_asc * X.util_asc + P.coef_cars34_asc_san_francisco * X.util_asc_san_francisco + P.coef_cars34_asc_county * X.util_asc_solano + P.coef_cars34_asc_county * X.util_asc_napa + P.coef_cars34_asc_county * X.util_asc_sonoma + P.coef_cars234_asc_marin * X.util_asc_marin + P.coef_retail_auto_no_workers * X.util_retail_auto_no_workers + P.coef_retail_auto_workers * X.util_retail_auto_workers + P.coef_retail_transit_no_workers * X.util_retail_transit_no_workers + P.coef_retail_transit_workers * X.util_retail_transit_workers + P.coef_retail_non_motor * X.util_retail_non_motor_no_workers + P.coef_retail_non_motor * X.util_retail_non_motor_workers + P.coef_cars3_auto_time_saving_per_worker * X.util_auto_time_saving_per_worker
4,P.coef_cars4_drivers_2 * X.util_drivers_2 + P.coef_cars4_drivers_3 * X.util_drivers_3 + P.coef_cars4_drivers_4_up * X.util_drivers_4_up + P.coef_cars34_persons_16_17 * X.util_persons_16_17 + P.coef_cars34_persons_18_24 * X.util_persons_18_24 + P.coef_cars34_persons_25_34 * X.util_persons_25_34 + P.coef_cars234_presence_children_0_4 * X.util_presence_children_0_4 + P.coef_cars34_presence_children_5_17 * X.util_presence_children_5_17 + P.coef_cars4_num_workers_clip_3 * X.util_num_workers_clip_3 + P.coef_cars4_hh_income_0_30k * X.util_hh_income_0_30k + P.coef_cars4_hh_income_30_up * X.util_hh_income_30_75k + P.coef_cars4_hh_income_30_up * X.util_hh_income_75k_up + P.coef_cars34_density_0_10_no_workers * X.util_density_0_10_no_workers + P.coef_cars34_density_10_up_no_workers * X.util_density_10_up_no_workers + P.coef_cars34_density_0_10_no_workers * X.util_density_0_10_workers + P.coef_cars34_density_10_up_no_workers * X.util_density_10_up_workers + P.coef_cars4_asc * X.util_asc + P.coef_cars34_asc_san_francisco * X.util_asc_san_francisco + P.coef_cars34_asc_county * X.util_asc_solano + P.coef_cars34_asc_county * X.util_asc_napa + P.coef_cars34_asc_county * X.util_asc_sonoma + P.coef_cars234_asc_marin * X.util_asc_marin + P.coef_retail_auto_no_workers * X.util_retail_auto_no_workers + P.coef_retail_auto_workers * X.util_retail_auto_workers + P.coef_retail_transit_no_workers * X.util_retail_transit_no_workers + P.coef_retail_transit_workers * X.util_retail_transit_workers + P.coef_retail_non_motor * X.util_retail_non_motor_no_workers + P.coef_retail_non_motor * X.util_retail_non_motor_workers + P.coef_cars4_auto_time_saving_per_worker * X.util_auto_time_saving_per_worker


In [12]:
larch_asim.apply_coefficients(coefficients, m)

In [13]:
m.pf

Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note
coef_cars1_asc,1.1865,0.0,0.0,,,0,
coef_cars1_asc_county,-0.5660,0.0,0.0,,,0,
coef_cars1_asc_marin,-0.2434,0.0,0.0,,,0,
coef_cars1_asc_san_francisco,0.4259,0.0,0.0,,,0,
coef_cars1_auto_time_saving_per_worker,0.4707,0.0,0.0,,,0,
...,...,...,...,...,...,...,...
coef_retail_auto_no_workers,0.0626,0.0,0.0,,,0,
coef_retail_auto_workers,0.1646,0.0,0.0,,,0,
coef_retail_non_motor,-0.0300,0.0,0.0,,,1,
coef_retail_transit_no_workers,-0.3053,0.0,0.0,,,0,


In [14]:
d = larch.DataFrames(
    co=chooser_data,
    alt_codes=altcodes,
    alt_names=altnames,
    av=True,
)

In [15]:
m.dataservice = d

In [16]:
m.choice_co_code = 'override_choice'

# Estimate

Note: The demo test data here is 100 households and the model has 
57 estimated parameters -- the result is a very over-specified
model which does not have a numerically stable likelihood maximizing
solution.

In [17]:
m.estimate()

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
coef_cars1_asc,3.396437,0.0,0.0,,,0,,3.396437
coef_cars1_asc_county,-0.566001,0.0,0.0,,,0,,-0.566001
coef_cars1_asc_marin,-0.243401,0.0,0.0,,,0,,-0.243401
coef_cars1_asc_san_francisco,2.635835,0.0,0.0,,,0,,2.635835
coef_cars1_auto_time_saving_per_worker,0.176972,0.0,0.0,,,0,,0.176972
...,...,...,...,...,...,...,...,...
coef_retail_auto_no_workers,-0.365833,0.0,0.0,,,0,,-0.365833
coef_retail_auto_workers,-0.299251,0.0,0.0,,,0,,-0.299251
coef_retail_non_motor,-0.030000,0.0,0.0,,,1,,-0.030000
coef_retail_transit_no_workers,-0.331782,0.0,0.0,,,0,,-0.331782






  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


Unnamed: 0,0
coef_cars1_asc,3.396437
coef_cars1_asc_county,-0.566001
coef_cars1_asc_marin,-0.243401
coef_cars1_asc_san_francisco,2.635835
coef_cars1_auto_time_saving_per_worker,0.176972
coef_cars1_density_0_10_no_workers,0.000000
coef_cars1_density_10_up_no_workers,-0.016903
coef_cars1_density_10_up_workers,-0.013531
coef_cars1_drivers_2,0.000000
coef_cars1_drivers_3,0.000000

Unnamed: 0,0
coef_cars1_asc,3.396437
coef_cars1_asc_county,-0.566001
coef_cars1_asc_marin,-0.243401
coef_cars1_asc_san_francisco,2.635835
coef_cars1_auto_time_saving_per_worker,0.176972
coef_cars1_density_0_10_no_workers,0.0
coef_cars1_density_10_up_no_workers,-0.016903
coef_cars1_density_10_up_workers,-0.013531
coef_cars1_drivers_2,0.0
coef_cars1_drivers_3,0.0


In [18]:
# m.possible_overspecification

# Outputs

In [19]:
est_names = [j for j in coefficients.index if j in m.pf.index]
coefficients.loc[est_names,'value'] = m.pf.loc[est_names, 'value']

In [20]:
# Write out replacement coefficients file and model summaries
os.makedirs(os.path.join(edb_directory,'estimated'), exist_ok=True)

In [21]:
coefficients.reset_index().to_csv(
    os.path.join(edb_directory,'estimated',"auto_ownership_coefficients_revised.csv"), 
    index=False,
)

In [22]:
m.to_xlsx(
    os.path.join(edb_directory,'estimated',"auto_ownership_model_estimation.xlsx"), 
)

<larch.util.excel.ExcelWriter at 0x1bc28a7df08>