In [14]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable, log, exp
from biogeme import models
from biogeme import results as res

In [15]:
#data_file = "http://transp-or.epfl.ch/data/lpmc.dat"
data_file='lpmc10.dat'
lpmc = pd.read_csv(data_file, sep='\t')
lpmc

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,20,5,1,0,4,3,1,5,0.0,1,...,0.381667,0.000000,0.062222,0.000000,0,0.117222,0.00,0.41,0.0,0.097156
1,41,9,3,0,4,3,1,5,0.0,1,...,0.146944,0.000000,0.225000,0.000000,0,0.200833,0.00,0.48,0.0,0.378976
2,69,13,2,1,4,3,1,1,1.0,1,...,0.029444,0.083333,0.735833,0.398056,3,0.716944,6.00,2.16,0.0,0.582720
3,102,20,2,0,2,3,1,1,1.0,1,...,0.339722,0.183333,0.116667,0.266667,1,0.250833,3.00,0.89,0.0,0.170543
4,105,21,0,1,4,3,1,1,1.0,1,...,0.126389,0.000000,0.150000,0.000000,0,0.125833,1.50,0.37,0.0,0.154525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,80998,17591,0,5,4,3,1,5,0.0,3,...,0.189167,0.000000,0.117778,0.000000,0,0.082500,0.00,0.22,10.5,0.239057
4996,81000,17592,0,0,3,3,6,5,0.0,3,...,0.105278,0.000000,0.220278,0.000000,0,0.213611,0.00,0.52,0.0,0.412224
4997,81015,17597,0,3,4,3,1,5,0.0,3,...,0.343056,0.000000,0.177500,0.000000,0,0.189444,0.00,0.76,0.0,0.086510
4998,81041,17604,2,4,3,1,1,2,0.0,3,...,0.344444,0.316667,0.000000,0.083333,1,0.386111,1.05,0.98,0.0,0.340288


In [16]:
database = db.Database('trips', lpmc)

In [17]:
trip_id = Variable('trip_id')
household_id = Variable('household_id')
person_n = Variable('person_n')
travel_mode = Variable('travel_mode')
purpose = Variable('purpose')
fueltype = Variable('fueltype')
faretype = Variable('faretype')
bus_scale = Variable('bus_scale')
survey_year = Variable('survey_year')
travel_year = Variable('travel_year')
travel_month = Variable('travel_month')
travel_day = Variable('travel_day')
day_of_week = Variable('day_of_week')
start_time = Variable('day_of_week')
age = Variable('age')
female = Variable('female')
driving_license = Variable('driving_license')
car_ownership = Variable('car_ownership')
distance = Variable
dur_walking = Variable('dur_walking')
dur_cycling = Variable('dur_cycling')
dur_pt_access = Variable('dur_pt_access')
dur_pt_rail = Variable('dur_pt_rail')
dur_pt_bus = Variable('dur_pt_bus')
dur_pt_int = Variable('dur_pt_int')
pt_interchanges = Variable('pt_interchanges')
dur_driving = Variable('dur_driving')
cost_transit = Variable('cost_transit')
cost_driving_fuel = Variable('cost_driving_fuel')
cost_driving_ccharge = Variable('cost_driving_ccharge')
driving_traffic_percent = Variable('driving_traffic_percent')

### Model 0

In [18]:
asc_walk = Beta('asc_walk', 0, None, None, 0)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [19]:
cost_driving = cost_driving_fuel + cost_driving_ccharge
dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #total duration of public transportation

In [20]:
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

In [21]:
V_walk = asc_walk + beta_time * dur_walking
V_cycling = asc_cycling + beta_time * dur_cycling
V_driving = asc_driving + beta_time * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time * dur_public + beta_cost * cost_transit

In [22]:
prob_walk = 1 / (1 + exp(V_cycling - V_walk) + exp(V_driving - V_walk) + exp(V_public - V_walk))
prob_cycling = 1 / (1 + exp(V_walk - V_cycling) + exp(V_driving - V_cycling) + exp(V_public - V_cycling))
prob_driving = 1 / (1 + exp(V_cycling - V_driving) + exp(V_walk - V_driving) + exp(V_public - V_driving))
prob_public = 1-prob_driving + prob_cycling + prob_walk

prob_observation = prob_walk * (travel_mode == 1) + prob_cycling * (travel_mode == 2) + prob_driving * (travel_mode == 3) + prob_public * (travel_mode == 4) 
logprob = log(prob_observation)

In [23]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_0'

In [24]:
results0 = biogeme.estimate()

In [25]:
print(results0.printGeneralStatistics())

Number of estimated parameters:	6
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-1907.476
Final log likelihood:	-1907.476
Likelihood ratio test for the init. model:	3.053606e-07
Rho-square for the init. model:	8e-11
Rho-square-bar for the init. model:	-0.00315
Akaike Information Criterion:	3826.953
Bayesian Information Criterion:	3866.056
Final gradient norm:	1.3570E-02
Nbr of threads:	16



In [26]:
results0.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,2.165636,0.048637,44.526762,0.0
asc_driving,2.510434,0.046437,54.061249,0.0
asc_public,-8.654555,0.023001,-376.267926,0.0
asc_walk,3.978485,0.041616,95.600652,0.0
beta_cost,0.125126,0.00958,13.06087,0.0
beta_time,-1.249361,0.150803,-8.284731,2.220446e-16


In [27]:
results0.data.htmlFileName

'model_0~02.html'

### Model 1


In [28]:
asc_walk = Beta('asc_walk', 0, None, None, 0)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [29]:
cost_driving = cost_driving_fuel + cost_driving_ccharge
dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #total duration of public transportation

In [30]:
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)
beta_time_public = Beta('beta_time_public', 0, None, None, 0)

In [31]:
V_walk = asc_walk + beta_time_walk * dur_walking
V_cycling = asc_cycling + beta_time_cycling * dur_cycling
V_driving = asc_driving + beta_time_driving * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit

In [32]:
prob_walk = 1 / (1 + exp(V_cycling - V_walk) + exp(V_driving - V_walk) + exp(V_public - V_walk))
prob_cycling = 1 / (1 + exp(V_walk - V_cycling) + exp(V_driving - V_cycling) + exp(V_public - V_cycling))
prob_driving = 1 / (1 + exp(V_cycling - V_driving) + exp(V_walk - V_driving) + exp(V_public - V_driving))
prob_public = 1-prob_driving + prob_cycling + prob_walk

prob_observation = prob_walk * (travel_mode == 1) + prob_cycling * (travel_mode == 2) + prob_driving * (travel_mode == 3) + prob_public * (travel_mode == 4) 
logprob = log(prob_observation)

In [33]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1'

In [34]:
results1 = biogeme.estimate()



In [35]:
print(results1.printGeneralStatistics())

Number of estimated parameters:	9
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-3363.444
Final log likelihood:	-1631.756
Likelihood ratio test for the init. model:	3463.375
Rho-square for the init. model:	0.515
Rho-square-bar for the init. model:	0.512
Akaike Information Criterion:	3281.512
Bayesian Information Criterion:	3340.167
Final gradient norm:	1.4073E-02
Nbr of threads:	16



In [36]:
results1.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,1.851978,0.048408,38.257788,0.0
asc_driving,1.76374,0.103985,16.961447,0.0
asc_public,-8.23291,0.039333,-209.313999,0.0
asc_walk,4.617192,0.120386,38.353244,0.0
beta_cost,0.062819,0.011852,5.300165,1.156984e-07
beta_time_cycling,3.744404,0.405816,9.226845,0.0
beta_time_driving,5.709963,0.549775,10.386008,0.0
beta_time_public,0.066085,0.366857,0.180138,0.8570439
beta_time_walk,-2.116734,0.564027,-3.752894,0.0001748048


In [37]:
results1.data.htmlFileName

'model_1.html'

In [38]:
general_statistics_model_0 = results0.getGeneralStatistics()
general_statistics_model_0

{'Number of estimated parameters': GeneralStatistic(value=6, format=''),
 'Sample size': GeneralStatistic(value=5000, format=''),
 'Excluded observations': GeneralStatistic(value=0, format=''),
 'Init log likelihood': GeneralStatistic(value=-1907.476366744466, format='.7g'),
 'Final log likelihood': GeneralStatistic(value=-1907.4763665917858, format='.7g'),
 'Likelihood ratio test for the init. model': GeneralStatistic(value=3.053605723835062e-07, format='.7g'),
 'Rho-square for the init. model': GeneralStatistic(value=8.004308327258514e-11, format='.3g'),
 'Rho-square-bar for the init. model': GeneralStatistic(value=-0.0031455172666490228, format='.3g'),
 'Akaike Information Criterion': GeneralStatistic(value=3826.9527331835716, format='.7g'),
 'Bayesian Information Criterion': GeneralStatistic(value=3866.055892332069, format='.7g'),
 'Final gradient norm': GeneralStatistic(value=0.01356957932248403, format='.4E'),
 'Nbr of threads': GeneralStatistic(value=16, format='')}

In [39]:
general_statistics_model_1 = results1.getGeneralStatistics()
general_statistics_model_1

{'Number of estimated parameters': GeneralStatistic(value=9, format=''),
 'Sample size': GeneralStatistic(value=5000, format=''),
 'Excluded observations': GeneralStatistic(value=0, format=''),
 'Init log likelihood': GeneralStatistic(value=-3363.443564258548, format='.7g'),
 'Final log likelihood': GeneralStatistic(value=-1631.7559876836642, format='.7g'),
 'Likelihood ratio test for the init. model': GeneralStatistic(value=3463.375153149768, format='.7g'),
 'Rho-square for the init. model': GeneralStatistic(value=0.5148555471471467, format='.3g'),
 'Rho-square-bar for the init. model': GeneralStatistic(value=0.5121797180963376, format='.3g'),
 'Akaike Information Criterion': GeneralStatistic(value=3281.5119753673284, format='.7g'),
 'Bayesian Information Criterion': GeneralStatistic(value=3340.1667140900745, format='.7g'),
 'Final gradient norm': GeneralStatistic(value=0.014073024841928256, format='.4E'),
 'Nbr of threads': GeneralStatistic(value=16, format='')}

In [46]:
#compute the log-likelihood ratio
L0=general_statistics_model_0['Final log likelihood'].value
L1=general_statistics_model_1['Final log likelihood'].value
test = -2 * (L0 - L1)
test

551.4407578162431

In [47]:
#compute the degrees of freedom for the chi2 distribution
K0 = general_statistics_model_0['Number of estimated parameters'].value
K1 = general_statistics_model_1['Number of estimated parameters'].value
degrees_of_freedom = K1 - K0
degrees_of_freedom

3

In [42]:
from scipy.stats import chi2
threshold = chi2.ppf(.95, degrees_of_freedom)
threshold

7.814727903251179

In [44]:
results1.likelihood_ratio_test(results0, 0.05)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=551.4407578162431, threshold=7.814727903251179)

The null hypothesis is rejected at the 5% level.