# Estimating Workplace Location

Integration with [larch](https://larch.newman.me) for model estimation. See [estimation tools review](https://github.com/ActivitySim/activitysim/wiki/Estimation-Tools-Review) for more information about larch.

# Run the Example

Output an estimation data bundle (EBD), which contains:
  - model settings - workplace_location_model_settings.yaml
  - coefficients - workplace_location_coefficients.csv
  - utilities specification - workplace_location_SPEC.csv
  - land use data - workplace_location_landuse.csv
  - size terms - workplace_location_size_terms.csv
  - alternatives values - workplace_location_alternatives_combined.csv
  - chooser data - workplace_location_choosers_combined.csv
  - chooses made - workplace_location_choices.csv

# Read EDB 

In [1]:
import larch  # !conda install larch #for estimation
import os
import pandas as pd
import numpy as np
import yaml 
import larch.util.excel
import larch_asim  # utility functions in a local module

In [2]:
edb_directory = "estimation_data_bundle/workplace_location/"

def read_csv(filename, **kwargs):
    return pd.read_csv(os.path.join(edb_directory, filename), **kwargs)

In [3]:
coefficients = read_csv("workplace_location_coefficients.csv", index_col='coefficient_name')
spec = read_csv("workplace_location_SPEC.csv")
alt_values = read_csv("workplace_location_alternatives_combined.csv")
chooser_data = read_csv("workplace_location_choosers_combined.csv")
choices = read_csv("workplace_location_choices.csv")
landuse = read_csv("workplace_location_landuse.csv", index_col='TAZ')
size_spec = read_csv("workplace_location_size_terms.csv")


In [4]:
landuse

Unnamed: 0_level_0,DISTRICT,SD,county_id,TOTHH,TOTPOP,TOTACRE,RESACRE,CIACRE,TOTEMP,AGE0519,...,OPRKCST,area_type,HSENROLL,COLLFTE,COLLPTE,TOPOLOGY,TERMINAL,household_density,employment_density,density_index
TAZ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,1,1,46,82,20.3,1.0,15.00000,27318,7,...,932.83514,0,0.00000,0.00000,0.00000,3,5.89564,2.875000,1707.375000,2.870167
2,1,1,1,134,240,31.1,1.0,24.79297,42078,19,...,885.61682,0,0.00000,0.00000,0.00000,1,5.84871,5.195214,1631.374751,5.178722
3,1,1,1,267,476,14.7,1.0,2.31799,2445,38,...,716.27252,0,0.00000,0.00000,0.00000,1,5.53231,80.470405,736.891913,72.547987
4,1,1,1,151,253,19.3,1.0,18.00000,22434,20,...,314.00000,0,0.00000,0.00000,0.00000,2,5.64330,7.947368,1180.736842,7.894233
5,1,1,1,611,1069,52.7,1.0,15.00000,15662,86,...,314.01431,0,0.00000,72.14684,0.00000,1,5.52555,38.187500,978.875000,36.753679
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,4,4,1,2779,8062,376.0,172.0,15.00000,1760,1178,...,0.00000,3,0.00000,0.00000,0.00000,1,2.04173,14.860963,9.411765,5.762347
187,4,4,1,1492,4139,214.0,116.0,10.00000,808,603,...,0.00000,3,0.00000,0.00000,0.00000,2,1.73676,11.841270,6.412698,4.159890
188,4,4,1,753,4072,232.0,11.0,178.00000,4502,1117,...,0.00000,2,3961.04761,17397.79102,11152.93652,1,2.28992,3.984127,23.820106,3.413233
189,4,4,1,3546,8476,201.0,72.0,6.00000,226,1057,...,0.00000,2,0.00000,0.00000,0.00000,1,2.88773,45.461538,2.897436,2.723836


## Work Size Spec (extraction)

In [5]:
work_size_spec = size_spec \
.query("model_selector == 'workplace'") \
.drop(columns='model_selector') \
.set_index('segment')
work_size_spec = work_size_spec.loc[:,work_size_spec.max()>0]
work_size_spec

Unnamed: 0_level_0,RETEMPN,FPSEMPN,HEREMPN,OTHEMPN,AGREMPN,MWTEMPN
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
work_low,0.129,0.193,0.383,0.12,0.01,0.164
work_med,0.12,0.197,0.325,0.139,0.008,0.21
work_high,0.11,0.207,0.284,0.154,0.006,0.239
work_veryhigh,0.093,0.27,0.241,0.146,0.004,0.246


## Work Size Coefficients (extraction)

In [6]:
size_coef = work_size_spec.stack().reset_index()
size_coef.index = size_coef.iloc[:,0] +"_"+ size_coef.iloc[:,1]
size_coef = size_coef.loc[size_coef.iloc[:,2]>0]
size_coef['constrain'] = 'F'
one_each = size_coef.groupby('segment').first().reset_index()
size_coef.loc[one_each.iloc[:,0] +"_"+ one_each.iloc[:,1], 'constrain'] = 'T'
size_coef = size_coef.iloc[:,2:]
size_coef.columns = ['value','constrain']
size_coef.index.name = 'coefficient_name'
size_coef['value'] = np.log(size_coef['value'])
size_coef

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
work_low_RETEMPN,-2.047943,T
work_low_FPSEMPN,-1.645065,F
work_low_HEREMPN,-0.95972,F
work_low_OTHEMPN,-2.120264,F
work_low_AGREMPN,-4.60517,F
work_low_MWTEMPN,-1.807889,F
work_med_RETEMPN,-2.120264,T
work_med_FPSEMPN,-1.624552,F
work_med_HEREMPN,-1.12393,F
work_med_OTHEMPN,-1.973281,F


## Settings

In [7]:
settings = yaml.load(
    open(os.path.join(edb_directory,"workplace_location_model_settings.yaml"),"r"), 
    Loader=yaml.SafeLoader,
)

settings

{'SAMPLE_SIZE': 30,
 'SIMULATE_CHOOSER_COLUMNS': ['income_segment', 'TAZ'],
 'SAMPLE_SPEC': 'workplace_location_sample.csv',
 'SPEC': 'workplace_location.csv',
 'COEFFICIENTS': 'workplace_location_coeffs.csv',
 'LOGSUM_SETTINGS': 'tour_mode_choice.yaml',
 'LOGSUM_PREPROCESSOR': 'nontour_preprocessor',
 'LOGSUM_TOUR_PURPOSE': 'work',
 'CHOOSER_ORIG_COL_NAME': 'TAZ',
 'ALT_DEST_COL_NAME': 'alt_dest',
 'IN_PERIOD': 17,
 'OUT_PERIOD': 8,
 'DEST_CHOICE_COLUMN_NAME': 'workplace_taz',
 'annotate_persons': {'SPEC': 'annotate_persons_workplace',
  'DF': 'persons',
  'TABLES': ['land_use']},
 'annotate_households': {'SPEC': 'annotate_households_workplace',
  'DF': 'households',
  'TABLES': ['persons']},
 'CHOOSER_TABLE_NAME': 'persons_merged',
 'MODEL_SELECTOR': 'workplace',
 'CHOOSER_SEGMENT_COLUMN_NAME': 'income_segment',
 'CHOOSER_FILTER_COLUMN_NAME': 'is_worker',
 'SEGMENT_IDS': {'work_low': 1,
  'work_med': 2,
  'work_high': 3,
  'work_veryhigh': 4},
 'CONSTANTS': {'WORK_HIGH_SEGMENT_ID': 3

## Coefficients

In [8]:
coefficients

Unnamed: 0_level_0,value,constrain
coefficient_name,Unnamed: 1_level_1,Unnamed: 2_level_1
coef_dist_0_1,-0.8428,F
coef_dist_1_2,-0.3104,F
coef_dist_2_5,-0.3783,F
coef_dist_5_15,-0.1285,F
coef_dist_15_up,-0.0917,F
coef_dist_0_5_high,0.15,F
coef_dist_5_up_high,0.02,F
coef_mode_logsum,0.3,F


## Spec

In [9]:
spec

Unnamed: 0,Label,Description,Expression,coefficient
0,local_dist,,_DIST@skims['DIST'],1
1,util_dist_0_1,"Distance, piecewise linear from 0 to 1 miles","@_DIST.clip(0,1)",coef_dist_0_1
2,util_dist_1_2,"Distance, piecewise linear from 1 to 2 miles","@(_DIST-1).clip(0,1)",coef_dist_1_2
3,util_dist_2_5,"Distance, piecewise linear from 2 to 5 miles","@(_DIST-2).clip(0,3)",coef_dist_2_5
4,util_dist_5_15,"Distance, piecewise linear from 5 to 15 miles","@(_DIST-5).clip(0,10)",coef_dist_5_15
5,util_dist_15_up,"Distance, piecewise linear for 15+ miles",@(_DIST-15.0).clip(0),coef_dist_15_up
6,util_dist_0_5_high,"Distance 0 to 5 mi, high and very high income",@(df['income_segment']>=WORK_HIGH_SEGMENT_ID) ...,coef_dist_0_5_high
7,util_dist_15_up_high,"Distance 5+ mi, high and very high income",@(df['income_segment']>=WORK_HIGH_SEGMENT_ID) ...,coef_dist_5_up_high
8,util_size_variable,Size variable,@(df['size_term'] * df['shadow_price_size_term...,1
9,util_utility_adjustment,utility adjustment,@df['shadow_price_utility_adjustment'],1


In [10]:
# Remove shadow pricing and pre-existing size expression

spec = spec\
.set_index('Label')\
.drop(index=['util_size_variable', 'util_utility_adjustment'])\
.reset_index()

## Alt Values

In [11]:
alt_values

Unnamed: 0,person_id,variable,1,2,3,4,5,6,7,8,...,181,182,183,184,185,186,187,188,189,190
0,73062,TAZ,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,...,181.0,182.0,183.0,184.0,185.0,186.0,187.0,188.0,189.0,190.0
1,73062,mode_choice_logsum,-1.6362435488871192,-1.4929825128178205,-2.1292882030862463,-1.517217886111524,-2.077505398930201,-2.1393175276383545,-1.9792786853704318,-1.9402768115320042,...,-1.2531841517629387,-1.0909117041993586,-1.0074534209789625,-0.9942877884440126,-0.8617472793021375,-0.8933195823744491,-0.8526111703378986,-0.8073508509962445,-1.0172396387121214,-1.0855986863943157
2,73062,pick_count,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,73062,prob,0.023681536147045653,0.038405573612580825,0.002867237451163048,0.02248950905146199,0.01954833852378312,0.005392111810250817,0.016215260203216585,0.006196275581379968,...,0.00018672018930600382,0.00021125784224302474,0.0008833503808175141,0.0009941420012915495,0.0017628564399004049,0.0014492981103635726,0.0005788242915698628,0.004391363089547046,0.00015838477060510243,0.0009045538820515593
4,73062,shadow_price_size_term_adjustment,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48711,7538495,util_mode_logsum,-1.8880139317341136,-1.7669499031817812,-1.5420128978200283,-1.7607129752278634,-1.6811582583054274,-1.729260948532468,-1.6953718360296968,-1.706343726997913,...,-0.2095737687187247,-0.3201953321415343,-0.36449929233006306,-0.3389245353227024,-0.2732170993254152,-0.40719415102316364,-0.4408378058678504,-0.4764524536366856,-0.6079652742145248,-0.5347303817617927
48712,7538495,util_no_attractions,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
48713,7538495,util_sample_of_corrections_factor,3.556696011908456,3.0586220797014088,5.65682675712454,3.6912807688408167,3.8814714221361184,5.137299609332145,4.100666993555229,5.156321294253995,...,7.1171267134717935,7.443281571655769,6.198798068694531,5.987637968818029,5.426176263580913,5.981417385463189,7.024668052430423,5.1107649847407055,8.279048632848319,6.424274133257706
48714,7538495,util_size_variable,8.607993215742676,9.068802152955321,6.44746749765505,8.457988473558888,8.245952810520611,6.957999623413992,7.998487266140552,6.960822948235117,...,4.315499470829833,4.3335976552693785,5.721835205763653,5.789241258107118,6.1502039682537495,6.018658905769173,5.06677615336607,6.994814238173049,3.948605626489833,5.759690106594497


## Chooser Data

In [12]:
chooser_data

Unnamed: 0,person_id,income_segment,TAZ,model_choice,override_choice
0,73062,1,123,92,14
1,73212,1,127,130,5
2,73712,1,145,108,142
3,73752,1,149,20,38
4,73998,1,163,161,9
...,...,...,...,...,...
2559,7514761,1,106,73,90
2560,7514932,1,109,15,107
2561,7515185,1,142,20,13
2562,7515241,1,158,32,92


## Choices

In [13]:
choices

Unnamed: 0,person_id,model_choice
0,73062,92
1,73212,130
2,73712,108
3,73752,20
4,73998,161
...,...,...
5123,7443336,59
5124,7443337,86
5125,7443520,80
5126,7443521,124


# Data Setup

In [14]:
from larch import P, X

In [15]:
m = larch.Model()

In [16]:
m.utility_ca = larch_asim.linear_utility_from_spec(
    spec, x_col='Label', p_col='coefficient', 
    ignore_x=('local_dist',), 
)
print(m.utility_ca)

  P.coef_dist_0_1 * X.util_dist_0_1
+ P.coef_dist_1_2 * X.util_dist_1_2
+ P.coef_dist_2_5 * X.util_dist_2_5
+ P.coef_dist_5_15 * X.util_dist_5_15
+ P.coef_dist_15_up * X.util_dist_15_up
+ P.coef_dist_0_5_high * X.util_dist_0_5_high
+ P.coef_dist_5_up_high * X.util_dist_15_up_high
+ P('-999') * X.util_no_attractions
+ P.coef_mode_logsum * X.util_mode_logsum
+ P('1') * X.util_sample_of_corrections_factor


In [17]:
m.quantity_ca = sum(
    P(f"{i}_{q}") * X(q) * X(f"income_segment=={settings['SEGMENT_IDS'][i]}")
    for i in work_size_spec.index
    for q in work_size_spec.columns
)

In [18]:
larch_asim.apply_coefficients(coefficients, m)
larch_asim.apply_coefficients(size_coef, m, minimum=-6, maximum=6)

In [19]:
m.pf

Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note
-999,-999.0,-999.0,-999.0,-999.0,-999.0,1,
1,1.0,1.0,1.0,1.0,1.0,1,
coef_dist_0_1,-0.8428,0.0,0.0,,,0,
coef_dist_0_5_high,0.15,0.0,0.0,,,0,
coef_dist_15_up,-0.0917,0.0,0.0,,,0,
coef_dist_1_2,-0.3104,0.0,0.0,,,0,
coef_dist_2_5,-0.3783,0.0,0.0,,,0,
coef_dist_5_15,-0.1285,0.0,0.0,,,0,
coef_dist_5_up_high,0.02,0.0,0.0,,,0,
coef_mode_logsum,0.3,0.0,0.0,,,0,


In [20]:
x_co = chooser_data.set_index('person_id').rename(columns={'TAZ':'HOMETAZ'})

In [21]:
x_ca = larch_asim.cv_to_ca(
    alt_values.set_index(['person_id', 'variable'])
)

In [22]:
x_ca_1 = pd.merge(x_ca, landuse, on='TAZ', how='left')
x_ca_1.index = x_ca.index

In [23]:
d = larch.DataFrames(
    co=x_co,
    ca=x_ca_1,
    av=True,
)

In [24]:
d.info(1)

larch.DataFrames:  (not computation-ready)
  n_cases: 2564
  n_alts: 190
  data_ca:
    - TAZ                               (487160 non-null float64)
    - mode_choice_logsum                (487160 non-null float64)
    - pick_count                        (487160 non-null float64)
    - prob                              (487160 non-null float64)
    - shadow_price_size_term_adjustment (487160 non-null float64)
    - shadow_price_utility_adjustment   (487160 non-null float64)
    - size_term                         (487160 non-null float64)
    - util_dist_0_1                     (487160 non-null float64)
    - util_dist_0_5_high                (487160 non-null float64)
    - util_dist_15_up                   (487160 non-null float64)
    - util_dist_15_up_high              (487160 non-null float64)
    - util_dist_1_2                     (487160 non-null float64)
    - util_dist_2_5                     (487160 non-null float64)
    - util_dist_5_15                    (487160 non-null f

In [25]:
m.dataservice = d

In [26]:
m.choice_co_code = 'override_choice'

# Estimate

Note: The demo test data here is 100 households and the model has 
57 estimated parameters -- the result is a very over-specified
model which does not have a numerically stable likelihood maximizing
solution.

In [27]:
m.estimate()

req_data does not request avail_ca or avail_co but it is set and being provided


Unnamed: 0,value,initvalue,nullvalue,minimum,maximum,holdfast,note,best
-999,-999.0,-999.0,-999.0,-999.0,-999.0,1,,-999.0
1,1.0,1.0,1.0,1.0,1.0,1,,1.0
coef_dist_0_1,-1.553978,0.0,0.0,,,0,,-1.553978
coef_dist_0_5_high,0.271221,0.0,0.0,,,0,,0.271221
coef_dist_15_up,-0.0917,0.0,0.0,,,0,,-0.0917
coef_dist_1_2,-0.790137,0.0,0.0,,,0,,-0.790137
coef_dist_2_5,-0.646956,0.0,0.0,,,0,,-0.646956
coef_dist_5_15,-0.358857,0.0,0.0,,,0,,-0.358857
coef_dist_5_up_high,0.175882,0.0,0.0,,,0,,0.175882
coef_mode_logsum,0.155788,0.0,0.0,,,0,,0.155788


  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,0
Unnamed: 0_level_1,0
-999,-999.000000
1,1.000000
coef_dist_0_1,-1.553978
coef_dist_0_5_high,0.271221
coef_dist_15_up,-0.091700
coef_dist_1_2,-0.790137
coef_dist_2_5,-0.646956
coef_dist_5_15,-0.358857
coef_dist_5_up_high,0.175882
coef_mode_logsum,0.155788

Unnamed: 0,0
-999,-999.0
1,1.0
coef_dist_0_1,-1.553978
coef_dist_0_5_high,0.271221
coef_dist_15_up,-0.0917
coef_dist_1_2,-0.790137
coef_dist_2_5,-0.646956
coef_dist_5_15,-0.358857
coef_dist_5_up_high,0.175882
coef_mode_logsum,0.155788

Unnamed: 0,0
-999,0.0
1,0.0
coef_dist_0_1,3.5e-05
coef_dist_0_5_high,-0.004163
coef_dist_15_up,0.0
coef_dist_1_2,-2.1e-05
coef_dist_2_5,-0.004546
coef_dist_5_15,-0.00602
coef_dist_5_up_high,-0.003887
coef_mode_logsum,0.001723


In [28]:
# m.possible_overspecification

# Outputs

In [29]:
est_names = [j for j in coefficients.index if j in m.pf.index]
coefficients.loc[est_names,'value'] = m.pf.loc[est_names, 'value']

In [30]:
# Write out replacement coefficients file and model summaries
os.makedirs(os.path.join(edb_directory,'estimated'), exist_ok=True)

In [31]:
coefficients.reset_index().to_csv(
    os.path.join(edb_directory,'estimated',"workplace_location_coefficients_revised.csv"), 
    index=False,
)

In [32]:
m.to_xlsx(
    os.path.join(edb_directory,'estimated',"workplace_location_model_estimation.xlsx"), 
)

<larch.util.excel.ExcelWriter at 0x217003e4848>

In [33]:
# Write size coefficients into size_spec
for c in work_size_spec.columns:
    for i in work_size_spec.index:
        param_name = f"{i}_{c}"
        j = (size_spec['segment'] == i) & (size_spec['model_selector'] == 'workplace')
        size_spec.loc[j,c] = np.exp(m.get_value(param_name))
        

In [34]:
# Rescale each row to total 1, not mathematically needed
# but to maintain a consistent approach from existing ASim

size_spec.iloc[:,2:] = (size_spec.iloc[:,2:].div(size_spec.iloc[:,2:].sum(1), axis=0))

In [35]:
size_spec.to_csv(
    os.path.join(edb_directory,'estimated',"workplace_location_size_terms.csv"), 
    index=False,
)