# MNL

## Initial Model

In [1]:
# importing the requried libraries
from collections import OrderedDict 

import pandas as pd
import numpy as np

import pylogit as pl 

import warnings
warnings.filterwarnings('ignore')

# reading the data file
data  = pd.read_csv("nestlogit.csv",sep=",")

In [2]:
# Create dummy variables
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

data_AGE = le.fit(data['AGE'])
data['AGE'] = data_AGE.transform(data['AGE'])

data_GENDER = le.fit(data['GENDER'])
data['GENDER'] = data_GENDER.transform(data['GENDER'])

data_DEMOGRAPHY = le.fit(data['DEMOGRAPHY'])
data['DEMOGRAPHY'] = data_DEMOGRAPHY.transform(data['DEMOGRAPHY'])

data = data.drop(['Unnamed: 0','EXTRA_INCOME_AMOUNT'], axis = 1)

In [3]:
# Determine the columns for: alternative ids, the observation ids and the choice
ind_variables = data.columns.tolist()[3:9]
custom_alt_id = "alternative_id"
obs_id_column = "choiceSituationID"
choice_column = "Nest_Choice"

In [4]:
#specifying the utility equations
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification["intercept"] = [1,2,3]
basic_names["intercept"] = ['ASC Priority','ASC Standard', 'ASC Wait']

basic_specification["selfpay"] = [1,2,3]
basic_names["selfpay"] = ['Priority selfpay, units:hrs',
                          'Standard selfpay, units:hrs',
                          'Wait&Save selfpay, units:hrs']

basic_specification["cost"] = [1,2,3]
basic_names["cost"] = ['Priority Cost, units:$',
                       'Standard Cost, units:$',
                       'Wait&Save Cost, units:$']

basic_specification["wait"] = [1,2,3]
basic_names["wait"] = ['Priority Wait Time',
                       'Standard Wait Time',
                       'Wait&Save Wait Time']

basic_specification["traveltime"] = [1,2,3]
basic_names["traveltime"] = ['Priority Travel Time, units:hrs',
                             'Standard Travel Time, units:hrs',
                             'Wait&Save Travel Time, units:hrs']

In [5]:
# Estimate the multinomial logit model (MNL)
lyft_mnl = pl.create_choice_model(data=data,
                                  alt_id_col=custom_alt_id,
                                  obs_id_col=obs_id_column,
                                  choice_col=choice_column,
                                  specification=basic_specification,
                                  model_type="MNL",
                                  names=basic_names)

# Specify the initial values and method for the optimization.
lyft_mnl.fit_mle(np.zeros(15))

Log-likelihood at zero: -307.7573
Initial Log-likelihood: -307.7573
Estimation Time for Point Estimation: 0.01 seconds.
Final log-likelihood: -231.6002


In [6]:
lyft_mnl.get_statsmodels_summary()

0,1,2,3
Dep. Variable:,Nest_Choice,No. Observations:,222.0
Model:,Multinomial Logit Model,Df Residuals:,207.0
Method:,MLE,Df Model:,15.0
Date:,"Tue, 26 Apr 2022",Pseudo R-squ.:,0.247
Time:,10:09:46,Pseudo R-bar-squ.:,0.199
AIC:,493.200,Log-Likelihood:,-231.6
BIC:,544.241,LL-Null:,-307.757

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Priority,6.3801,1.231,5.184,0.000,3.968,8.792
ASC Standard,9.2354,1.461,6.320,0.000,6.371,12.100
ASC Wait,6.5711,2.287,2.873,0.004,2.088,11.054
"Priority selfpay, units:hrs",-1.5826,0.441,-3.588,0.000,-2.447,-0.718
"Standard selfpay, units:hrs",-0.9697,0.501,-1.937,0.053,-1.951,0.012
"Wait&Save selfpay, units:hrs",-0.0345,0.546,-0.063,0.950,-1.104,1.035
"Priority Cost, units:$",-0.1112,0.054,-2.047,0.041,-0.218,-0.005
"Standard Cost, units:$",-0.2582,0.059,-4.353,0.000,-0.374,-0.142
"Wait&Save Cost, units:$",-0.2891,0.060,-4.821,0.000,-0.407,-0.172


## model optimization

In [7]:
#specifying the utility equations -- one travel time
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification["intercept"] = [1,2,3]
basic_names["intercept"] = ['ASC Priority','ASC Standard', 'ASC Wait']

basic_specification["selfpay"] = [1,2]
basic_names["selfpay"] = ['Priority selfpay, units:hrs',
                          'Standard selfpay, units:hrs']

basic_specification["cost"] = [1,2,3]
basic_names["cost"] = ['Priority Cost, units:$',
                       'Standard Cost, units:$',
                       'Wait&Save Cost, units:$']

basic_specification["wait"] = [1,2]
basic_names["wait"] = ['Priority Wait Time, units:hrs',
                       'Standard Wait Time, units:hrs']

basic_specification["traveltime"] = [[1,2,3]]
basic_names["traveltime"] = ['Travel Time']

In [8]:
# Estimate the multinomial logit model (MNL)
lyft_mnl = pl.create_choice_model(data=data,
                                  alt_id_col=custom_alt_id,
                                  obs_id_col=obs_id_column,
                                  choice_col=choice_column,
                                  specification=basic_specification,
                                  model_type="MNL",
                                  names=basic_names)

# Specify the initial values and method for the optimization.
lyft_mnl.fit_mle(np.zeros(11))

Log-likelihood at zero: -307.7573
Initial Log-likelihood: -307.7573
Estimation Time for Point Estimation: 0.01 seconds.
Final log-likelihood: -239.1730


In [9]:
lyft_mnl.get_statsmodels_summary()

0,1,2,3
Dep. Variable:,Nest_Choice,No. Observations:,222.0
Model:,Multinomial Logit Model,Df Residuals:,211.0
Method:,MLE,Df Model:,11.0
Date:,"Tue, 26 Apr 2022",Pseudo R-squ.:,0.223
Time:,10:09:46,Pseudo R-bar-squ.:,0.187
AIC:,500.346,Log-Likelihood:,-239.173
BIC:,537.775,LL-Null:,-307.757

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Priority,7.1501,1.149,6.223,0.000,4.898,9.402
ASC Standard,8.7879,1.356,6.481,0.000,6.130,11.445
ASC Wait,4.1080,1.009,4.073,0.000,2.131,6.085
"Priority selfpay, units:hrs",-1.6667,0.355,-4.691,0.000,-2.363,-0.970
"Standard selfpay, units:hrs",-0.9910,0.411,-2.411,0.016,-1.797,-0.185
"Priority Cost, units:$",-0.1923,0.049,-3.952,0.000,-0.288,-0.097
"Standard Cost, units:$",-0.2222,0.050,-4.459,0.000,-0.320,-0.125
"Wait&Save Cost, units:$",-0.2209,0.050,-4.405,0.000,-0.319,-0.123
"Priority Wait Time, units:hrs",-0.7822,0.193,-4.060,0.000,-1.160,-0.405


In [10]:
#specifying the utility equations -- separate travel time
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification["intercept"] = [1,2,3]
basic_names["intercept"] = ['ASC Priority','ASC Standard', 'ASC Wait']

basic_specification["selfpay"] = [1,2]
basic_names["selfpay"] = ['Priority selfpay, units:hrs',
                          'Standard selfpay, units:hrs']

basic_specification["cost"] = [1,2,3]
basic_names["cost"] = ['Priority Cost, units:$',
                       'Standard Cost, units:$',
                       'Wait&Save Cost, units:$']

basic_specification["wait"] = [1,2]
basic_names["wait"] = ['Priority Wait Time, units:hrs',
                       'Standard Wait Time, units:hrs']

basic_specification["traveltime"] = [2,3]
basic_names["traveltime"] = ['Standard Travel Time, units:hrs',
                             'Wait&Save Travel Time, units:hrs']

In [11]:
# Estimate the multinomial logit model (MNL)
lyft_mnl = pl.create_choice_model(data=data,
                                  alt_id_col=custom_alt_id,
                                  obs_id_col=obs_id_column,
                                  choice_col=choice_column,
                                  specification=basic_specification,
                                  model_type="MNL",
                                  names=basic_names)

# Specify the initial values and method for the optimization.
lyft_mnl.fit_mle(np.zeros(12))

Log-likelihood at zero: -307.7573
Initial Log-likelihood: -307.7573
Estimation Time for Point Estimation: 0.01 seconds.
Final log-likelihood: -232.4122


In [12]:
lyft_mnl.get_statsmodels_summary()

0,1,2,3
Dep. Variable:,Nest_Choice,No. Observations:,222.0
Model:,Multinomial Logit Model,Df Residuals:,210.0
Method:,MLE,Df Model:,12.0
Date:,"Tue, 26 Apr 2022",Pseudo R-squ.:,0.245
Time:,10:09:46,Pseudo R-bar-squ.:,0.206
AIC:,488.824,Log-Likelihood:,-232.412
BIC:,529.656,LL-Null:,-307.757

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Priority,5.8210,1.063,5.475,0.000,3.737,7.905
ASC Standard,8.7380,1.361,6.419,0.000,6.070,11.406
ASC Wait,4.1983,0.993,4.229,0.000,2.253,6.144
"Priority selfpay, units:hrs",-1.5629,0.358,-4.371,0.000,-2.264,-0.862
"Standard selfpay, units:hrs",-0.9163,0.410,-2.233,0.026,-1.720,-0.112
"Priority Cost, units:$",-0.0782,0.027,-2.885,0.004,-0.131,-0.025
"Standard Cost, units:$",-0.2356,0.050,-4.742,0.000,-0.333,-0.138
"Wait&Save Cost, units:$",-0.2737,0.054,-5.083,0.000,-0.379,-0.168
"Priority Wait Time, units:hrs",-0.8076,0.198,-4.087,0.000,-1.195,-0.420


# Nested Logit

In [13]:
# Specify the nesting values
nest_membership = OrderedDict()
nest_membership["Lyft"] = [1,2,3]
nest_membership["non-Lyft"] = [4]

In [14]:
#specifying the utility equations
basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification["intercept"] = [1,2,3]
basic_names["intercept"] = ['ASC Priority','ASC Standard', 'ASC Wait']

basic_specification["selfpay"] = [1,2]
basic_names["selfpay"] = ['Priority selfpay, units:hrs',
                          'Standard selfpay, units:hrs']

basic_specification["cost"] = [1,2,3]
basic_names["cost"] = ['Priority Cost, units:$',
                       'Standard Cost, units:$',
                       'Wait&Save Cost, units:$']

basic_specification["wait"] = [1,2]
basic_names["wait"] = ['Priority Wait Time, units:hrs',
                       'Standard Wait Time, units:hrs']

basic_specification["traveltime"] = [2,3]
basic_names["traveltime"] = ['Standard Travel Time, units:hrs',
                             'Wait&Save Travel Time, units:hrs']

In [15]:
lyft_mnl = pl.create_choice_model(data=data,
                                  alt_id_col=custom_alt_id,
                                  obs_id_col=obs_id_column,
                                  choice_col=choice_column,
                                  specification=basic_specification,
                                  model_type="Nested Logit",
                                  names=basic_names,
                                  nest_spec=nest_membership)

lyft_mnl.fit_mle(np.zeros(14))

Log-likelihood at zero: -324.7433
Initial Log-likelihood: -324.7433
Estimation Time for Point Estimation: 0.27 seconds.
Final log-likelihood: -185.7664


In [16]:
lyft_mnl.get_statsmodels_summary()

0,1,2,3
Dep. Variable:,Nest_Choice,No. Observations:,222.0
Model:,Nested Logit Model,Df Residuals:,208.0
Method:,MLE,Df Model:,14.0
Date:,"Tue, 26 Apr 2022",Pseudo R-squ.:,0.428
Time:,10:09:46,Pseudo R-bar-squ.:,0.385
AIC:,399.533,Log-Likelihood:,-185.766
BIC:,447.170,LL-Null:,-324.743

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Lyft,-3.4545,0.543,-6.367,0.000,-4.518,-2.391
non-Lyft,0,,,,,
ASC Priority,3.1253,0.783,3.992,0.000,1.591,4.660
ASC Standard,3.1627,0.803,3.938,0.000,1.588,4.737
ASC Wait,2.8576,0.657,4.351,0.000,1.570,4.145
"Priority selfpay, units:hrs",-0.0500,0.033,-1.532,0.125,-0.114,0.014
"Standard selfpay, units:hrs",-0.0392,0.028,-1.398,0.162,-0.094,0.016
"Priority Cost, units:$",-0.0442,0.022,-2.037,0.042,-0.087,-0.002
"Standard Cost, units:$",-0.0481,0.024,-2.024,0.043,-0.095,-0.002


In [17]:
pylogit_mu = 1.0 + np.exp(-1 * lyft_mnl.params["Lyft"])
print("PyLogit's estimated Mu is: {:,.4f}".format(pylogit_mu))

PyLogit's estimated Mu is: 32.6433


## We choose MNL as our final model