In [7]:
import pandas as pd
import biogeme.database as db
from biogeme.expressions import Variable

import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta

# Read t h e d a t a
df = pd.read_csv( 'lpmc01.dat' , sep='\t' )
database = db.Database ( 'lmpc01' , df )

trip_id = Variable('trip_id') 
household_id = Variable('household_id') 
person_n = Variable('person_n') 
trip_n = Variable('trip_n') 
travel_mode = Variable('travel_mode') 
purpose = Variable('purpose') 
fueltype = Variable('fueltype') 
faretype = Variable('faretype') 
bus_scale = Variable('bus_scale') 
survey_year = Variable('survey_year') 
travel_year = Variable('travel_year') 
travel_month = Variable('travel_month') 
travel_date = Variable('travel_date') 
day_of_week = Variable('day_of_week') 
start_time = Variable('start_time') 
age = Variable('age') 
female = Variable('female') 
driving_license = Variable('driving_license') 
car_ownership = Variable('car_ownership') 
distance = Variable('distance') 
dur_walking = Variable('dur_walking') 
dur_cycling = Variable('dur_cycling') 
dur_pt_access = Variable('dur_pt_access') 
dur_pt_rail = Variable('dur_pt_rail') 
dur_pt_bus = Variable('dur_pt_bus') 
dur_pt_int = Variable('dur_pt_int') 
pt_interchanges = Variable('pt_interchanges') 
dur_driving = Variable('dur_driving') 
cost_transit = Variable('cost_transit') 
cost_driving_fuel = Variable('cost_driving_fuel') 
cost_driving_ccharge = Variable('cost_driving_ccharge') 
driving_traffic_percent = Variable('driving_traffic_percent') 

#CREATE VARIABLES

cost_driving  = database.DefineVariable('cost_driving' , cost_driving_fuel + cost_driving_ccharge)
tt_driving    = database.DefineVariable('tt_driving' , dur_driving)
cost_cycling  = database.DefineVariable('cost_cycling' , dur_cycling * 0)
cost_walking  = database.DefineVariable('cost_walking' , dur_walking * 0)
tt_walking    = database.DefineVariable('tt_walking' , dur_walking)
tt_cycling    = database.DefineVariable('tt_cycling' , dur_cycling)
cost_publict  = database.DefineVariable('cost_publict' , cost_transit) 

# aucune idee si c'est vrai, les datas ne sont pas expliquées
tt_publict    = database.DefineVariable('tt_publict' , dur_pt_access + dur_pt_rail + dur_pt_bus
                                                        + dur_pt_int  ) # ?

#new variables

#pour Beta seul le dernier argument est important : 
#a flag that indicates if the parameter must be estimated (0) or if it keeps its default value (1).
B0_driving = Beta('B0_driving', 0, None, None, 0)
B0_cycling = Beta('B0_cycling', 0, None, None, 0)
B0_walking = Beta('B0_walking', 0, None, None, 1) # normalized to zero
B0_publict = Beta('B0_publict', 0, None, None, 0)
B1 = Beta('B1', 0, None, None, 0)
B2 = Beta('B2', 0, None, None, 0)


# D e f i n i t i o n  o f  t h e  u t i l i t y  f u n c t i o n s

V_driving = ( B0_driving + B1 * cost_driving + B2 * tt_driving )
V_cycling = ( B0_cycling + B1 * cost_cycling + B2 * tt_cycling )
V_publict = ( B0_walking + B1 * cost_walking + B2 * tt_walking )
V_walking = ( B0_publict + B1 * cost_publict + B2 * tt_publict )

# A s s o c i a t e  u t i l i t y  f u n c t i o n s  w i t h  t h e numbering o f  a l t e r n a t i v e s

V = {1: V_driving , 2: V_cycling , 3: V_walking , 4: V_publict }

#il faut decrire le fait que tous les choix sont available
un = database.DefineVariable('un' , cost_publict * 0 + 1) 

av = {1 : un, 2 : un, 3 : un, 4 : un }

#
CHOICE = database.DefineVariable('CHOICE' , travel_mode)

# D e f i n i t i o n  o f  t h e model . This i s  t h e  c o n t r i b u t i o n  o f each
# o b s e r v a t i o n  t o  t h e  l o g l i k e l i h o o d  f u n c t i o n .
logprob = models.loglogit(V, av , CHOICE)

# Create t h e Biogeme o b j e c t
the_biogeme = bio.BIOGEME( database , logprob )
the_biogeme.modelName = 'modele_0'

#??? cette fonction ne marche pas
# C a l c u l a t e  t h e  n u l l  l o g l i k e l i h o o d  f o r  r e p o r t i n g .
#the_biogeme.calculateNullLogLikelihood( av )

# E s t i m a t e  t h e  p a r a m e t e r s
results = the_biogeme.estimate()
print( results.short_summary() )

# Get t h e  r e s u l t s  i n  a pandas t a b l e
pandas_results = results.getEstimatedParameters()
print(pandas_results)

#print(database)
#print(V)


Results for model modele_0
Nbr of parameters:		5
Sample size:			5000
Excluded data:			0
Final log likelihood:		-5645.127
Akaike Information Criterion:	11300.25
Bayesian Information Criterion:	11332.84

               Value  Rob. Std err  Rob. t-test  Rob. p-value
B0_cycling -2.939355      0.093214   -31.533531  0.000000e+00
B0_driving -0.995264      0.053889   -18.468843  0.000000e+00
B0_publict -0.263958      0.037786    -6.985645  2.835510e-12
B1         -0.037333      0.011413    -3.271186  1.070973e-03
B2         -0.194648      0.035839    -5.431210  5.597321e-08


In [9]:

#pour Beta seul le dernier argument est important : 
#a flag that indicates if the parameter must be estimated (0) or if it keeps its default value (1).
B0_driving = Beta('B0_driving', 0, None, None, 0)
B0_cycling = Beta('B0_cycling', 0, None, None, 0)
B0_walking = Beta('B0_walking', 0, None, None, 1) # normalized to zero
B0_publict = Beta('B0_publict', 0, None, None, 0)
B1 = Beta('B1', 0, None, None, 0)
B2 = Beta('B2', 0, None, None, 0)

B1_driving = Beta('B1_driving', 0, None, None, 0)
B2_driving = Beta('B2_driving', 0, None, None, 0)

# D e f i n i t i o n  o f  t h e  u t i l i t y  f u n c t i o n s

V_driving = ( B0_driving + (B1 + B1_driving) * cost_driving + (B2 + B2_driving) * tt_driving )
V_cycling = ( B0_cycling + B1 * cost_cycling + B2 * tt_cycling )
V_publict = ( B0_walking + B1 * cost_walking + B2 * tt_walking )
V_walking = ( B0_publict + B1 * cost_publict + B2 * tt_publict )


# A s s o c i a t e  u t i l i t y  f u n c t i o n s  w i t h  t h e numbering o f  a l t e r n a t i v e s

V = {1: V_driving , 2: V_cycling , 3: V_walking , 4: V_publict }

#il faut decrire le fait que tous les choix sont available
av = {1 : un, 2 : un, 3 : un, 4 : un }

# D e f i n i t i o n  o f  t h e model . This i s  t h e  c o n t r i b u t i o n  o f each
# o b s e r v a t i o n  t o  t h e  l o g l i k e l i h o o d  f u n c t i o n .
logprob = models.loglogit(V, av , CHOICE)

# Create t h e Biogeme o b j e c t
the_biogeme = bio.BIOGEME( database , logprob )
the_biogeme.modelName = 'modele_1'

#??? cette fonction ne marche pas
# C a l c u l a t e  t h e  n u l l  l o g l i k e l i h o o d  f o r  r e p o r t i n g .
#the_biogeme.calculateNullLogLikelihood( av )

# E s t i m a t e  t h e  p a r a m e t e r s
results = the_biogeme.estimate()
print( results.short_summary() )

# Get t h e  r e s u l t s  i n  a pandas t a b l e
pandas_results = results.getEstimatedParameters()
print(pandas_results)


Results for model modele_1
Nbr of parameters:		7
Sample size:			5000
Excluded data:			0
Final log likelihood:		-4757.949
Akaike Information Criterion:	9529.898
Bayesian Information Criterion:	9575.518

                Value  Rob. Std err  Rob. t-test  Rob. p-value
B0_cycling  -3.202124      0.098346   -32.559693  0.000000e+00
B0_driving   1.913538      0.111111    17.221915  0.000000e+00
B0_publict  -0.561202      0.049275   -11.389277  0.000000e+00
B1          -0.019550      0.023465    -0.833187  4.047394e-01
B1_driving   0.229521      0.035471     6.470676  9.756551e-11
B2          -0.490333      0.045595   -10.754172  0.000000e+00
B2_driving -22.710572      0.995998   -22.801832  0.000000e+00


In [None]:
#statistical test to compare modele 0 and modele 1
# t test i guess

#B1 not significant ? but all others yes ?
