In [1]:
import orca
import pandas as pd
from urbansim.models import MNLDiscreteChoiceModel

import models, utils, lcm_utils
from urbansim.utils import misc, networks
import output_indicators

using seed 271828


In [2]:
import numpy as np
from collections import OrderedDict
from choicemodels import MultinomialLogit
from choicemodels.tools import MergedChoiceTable

  from pandas.core import datetools


In [3]:
buildings = pd.HDFStore('building_store.h5').buildings

buildings_nonres = buildings[buildings.non_residential_sqft > 0]

jobs = orca.get_table('jobs').to_frame(['sector_id', 'building_id', 'home_based_status'])

jobs.building_id = jobs.building_id.astype('int')

buildings_res = buildings[buildings.residential_units > 0]

hh = orca.get_table('households')
hh_vars = hh.local_columns + ['income_quartile', 'is_race4', 'is_race1', 'is_race2', 'ln_income',
                              'hhsize_lt_3', 'is_young', 'has_children']
hh = hh.to_frame(hh_vars)

  return np.log1p(households.income)


In [8]:
model_configs = lcm_utils.get_model_category_configs()

for model_category_name, model_category_attributes in model_configs.items():
    if model_category_name == 'elcm':
        for yaml_config in model_category_attributes['config_filenames']:
            print yaml_config
            model = MNLDiscreteChoiceModel.from_yaml(str_or_buffer=misc.config(yaml_config))
            
            # Patsy-form specification
            patsy_str = ' + '
            patsy_str = patsy_str.join(model.model_expression) + ' - 1'
            print patsy_str
            
            # Pylogit-form specification
            vars_for_dict = OrderedDict([(varname, 'all_same') for varname in model.model_expression])
            
            submodel_id = int(yaml_config.split('elcm')[-1].split('.')[0])
            
            choosers = jobs[(jobs.sector_id == submodel_id) & (jobs.home_based_status == 0)]

            choosers = choosers.loc[np.random.choice(
                choosers.index,
                3000, replace=False)]
            
            chosen = choosers['building_id']
            alternatives = buildings_nonres

            data = MergedChoiceTable(observations = choosers,
                                     alternatives = alternatives,
                                     chosen_alternatives = chosen,
                                     sample_size = 15)
            
            model = MultinomialLogit(data = data.to_frame(),
                         observation_id_col = data.observation_id_col, 
                         choice_col = data.choice_col,
                         model_expression = vars_for_dict,
                         alternative_id_col='building_id') #patsy_str
            print model._estimation_engine
            
            results = model.fit()
            print results
            print ''

elcm/regional/elcm2.yaml
general_type_is_Office + general_type_is_Industrial + b_ln_non_residential_sqft + zones_ln_empden_2 + nodes_drv_log_sum_60min_jobs + building_age + nodes_walk_log_sum_residential_units + sqft_price_nonres - 1
PyLogit
Log-likelihood at zero: -8,124.1506
Initial Log-likelihood: -8,124.1506
Estimation Time for Point Estimation: 0.42 seconds.
Final log-likelihood: -6,321.0926
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                      chosen   No. Observations:                3,000
Model:             Multinomial Logit Model   Df Residuals:                    2,992
Method:                                MLE   Df Model:                            8
Date:                     Thu, 21 Jun 2018   Pseudo R-squ.:                   0.222
Time:                             18:10:18   Pseudo R-bar-squ.:               0.221
AIC:                            12,658.185   Log-Likelihood:             -6,321.093
BIC:        

PyLogit
Log-likelihood at zero: -8,124.1506
Initial Log-likelihood: -8,124.1506
Estimation Time for Point Estimation: 0.18 seconds.
Final log-likelihood: -3,317.2959
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                      chosen   No. Observations:                3,000
Model:             Multinomial Logit Model   Df Residuals:                    2,995
Method:                                MLE   Df Model:                            5
Date:                     Thu, 21 Jun 2018   Pseudo R-squ.:                   0.592
Time:                             18:11:00   Pseudo R-bar-squ.:               0.591
AIC:                             6,644.592   Log-Likelihood:             -3,317.296
BIC:                             6,674.624   LL-Null:                    -8,124.151
                                   coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------

PyLogit
Log-likelihood at zero: -8,124.1506
Initial Log-likelihood: -8,124.1506
Estimation Time for Point Estimation: 0.15 seconds.
Final log-likelihood: -4,077.3977
                     Multinomial Logit Model Regression Results                    
Dep. Variable:                      chosen   No. Observations:                3,000
Model:             Multinomial Logit Model   Df Residuals:                    2,994
Method:                                MLE   Df Model:                            6
Date:                     Thu, 21 Jun 2018   Pseudo R-squ.:                   0.498
Time:                             18:11:53   Pseudo R-bar-squ.:               0.497
AIC:                             8,166.795   Log-Likelihood:             -4,077.398
BIC:                             8,202.834   LL-Null:                    -8,124.151
                                   coef    std err          z      P>|z|      [0.025      0.975]
-----------------------------------------------------------------