In [1]:
# PS1 - CE264
# Sample Code
# GSIs: Mustapha Harb - Mengqiao Yu - Andrew Campbell

# importing the requried libraries
from collections import OrderedDict    # For recording the model specification 

import pandas as pd                    # For file input/output
import numpy as np                     # For vectorized math operations

import pylogit as pl                   # For MNL model estimation and
                                       # conversion from wide to long format

import warnings
warnings.filterwarnings('ignore')


# reading the data file 
data_01 = pd.read_csv("data01.csv",sep=",")

# Look at the columns of the data file
data_01.columns

#look at the first 20 columns of the code
data_01.head(20)



# specifying the utility equations

basic_specification = OrderedDict()
basic_names = OrderedDict()

basic_specification["intercept"] = [1, 2]
basic_names["intercept"] = ['ASC Train',
                            'ASC Metro']

basic_specification["travel_time_hrs"] = [[1, 2,], 3]
basic_names["travel_time_hrs"] = ['Travel Time, units:hrs (Train and Metro)',
                                  'Travel Time, units:hrs (Car)']
                                  
basic_specification["travel_cost_hundreth"] = [[1, 2,], 3]
basic_names["travel_cost_hundreth"] = ['Travel Cost, units:hundredth (Train and Metro)',
                                  'Travel Cost, units:hundredth (Car)']

basic_specification["headway_hrs"] = [1, 2]
basic_names["headway_hrs"] = ["Headway, units:hrs, (Train)",
                              "Headway, units:hrs, (Metro)"]



##########
# Determine the columns for: alternative ids, the observation ids and the choice
##########
# The 'alternative_id' variable will identify the alternative associated with each row.
alternative_id = "alt_id"

# The 'obs_id' variable will identify the observation id associated with each row.
observation_id = "obs_id"


# Create a 'choice' variable which identifies the choice associated with each row.
choice = "CHOICE"

# Estimate the multinomial logit model (MNL)
model_01_mnl = pl.create_choice_model(data=data_01,
                                        alt_id_col=alternative_id,
                                        obs_id_col=observation_id,
                                        choice_col=choice,
                                        specification=basic_specification,
                                        model_type="MNL",
                                        names=basic_names)

# Specify the initial values and method for the optimization.
model_01_mnl.fit_mle(np.zeros(8)) # 8 is the total number of parameters to be esimtated



# Look at the estimation results
model_01_mnl.get_statsmodels_summary()

Log-likelihood at zero: -2,115.1955
Initial Log-likelihood: -2,115.1955
Estimation Time for Point Estimation: 0.04 seconds.
Final log-likelihood: -1,713.6303


0,1,2,3
Dep. Variable:,CHOICE,No. Observations:,2304.0
Model:,Multinomial Logit Model,Df Residuals:,2296.0
Method:,MLE,Df Model:,8.0
Date:,"Tue, 30 Jan 2018",Pseudo R-squ.:,0.19
Time:,18:47:45,Pseudo R-bar-squ.:,0.186
AIC:,3443.261,Log-Likelihood:,-1713.63
BIC:,3489.200,LL-Null:,-2115.196

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ASC Train,0.9417,0.248,3.792,0.000,0.455,1.428
ASC Metro,0.9847,0.236,4.164,0.000,0.521,1.448
"Travel Time, units:hrs (Train and Metro)",-0.4136,0.073,-5.704,0.000,-0.556,-0.271
"Travel Time, units:hrs (Car)",-0.0374,0.040,-0.931,0.352,-0.116,0.041
"Travel Cost, units:hundredth (Train and Metro)",-0.4177,0.152,-2.745,0.006,-0.716,-0.119
"Travel Cost, units:hundredth (Car)",-2.2127,0.325,-6.809,0.000,-2.850,-1.576
"Headway, units:hrs, (Train)",-0.3861,0.077,-4.992,0.000,-0.538,-0.235
"Headway, units:hrs, (Metro)",-0.1821,0.327,-0.556,0.578,-0.824,0.460
