# Import packages and database

In [12]:
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable, Derive
from biogeme import models
from biogeme import results as res
from biogeme.expressions import DefineVariable, log
from collections import namedtuple
data_file ='https://raw.githubusercontent.com/GustavePellier/MMOB/main/lpmc19.dat'
LPMC = pd.read_csv(data_file, sep='\t')
LPMC


Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,29,8,1,2,3,3,1,1,1.0,1,...,0.053611,0.000000,0.433889,0.000000,0,0.467778,1.5,1.23,0.0,0.397862
1,65,13,1,3,3,5,1,5,0.0,1,...,0.050833,0.216667,0.590556,0.237778,2,0.863889,0.0,2.60,0.0,0.675884
2,70,13,2,2,3,3,1,1,1.0,1,...,0.158333,0.150000,0.000000,0.000000,0,0.242500,2.4,0.72,0.0,0.382589
3,77,15,0,1,1,5,6,1,1.0,1,...,0.095000,0.000000,0.099444,0.000000,0,0.140833,1.5,0.33,0.0,0.366864
4,90,18,0,0,4,3,6,1,1.0,1,...,0.163889,0.000000,0.064444,0.000000,0,0.128611,1.5,0.30,0.0,0.144708
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,81007,17594,2,4,1,3,1,3,0.0,3,...,0.247778,0.000000,0.075000,0.000000,0,0.113611,0.0,0.29,0.0,0.547677
4996,81022,17599,0,0,3,1,6,1,1.0,3,...,0.087500,0.000000,0.766944,0.162778,1,0.328611,3.0,1.31,0.0,0.076923
4997,81033,17602,0,1,4,3,1,1,1.0,3,...,0.124167,0.000000,0.349444,0.000000,0,0.138889,1.5,0.39,0.0,0.358000
4998,81049,17605,0,5,4,3,1,5,0.0,3,...,0.231389,0.000000,0.255556,0.000000,0,0.218611,0.0,0.58,0.0,0.261753


In [13]:
database = db.Database('LPMC', LPMC)
all_results = {}


# Model 0

We calculate the total public transport duration and the total driving cost

In [14]:
LPMC["dur_pt"]= LPMC["dur_pt_access"] + LPMC["dur_pt_rail"] + LPMC["dur_pt_bus"] + LPMC["dur_pt_int"] 
LPMC["cost_drive"] = LPMC["cost_driving_ccharge"] + LPMC["cost_driving_fuel"]

Some variables are created with the columns that seem to be useful

In [15]:
travel_mode=Variable('travel_mode')

dur_pt_access=Variable('dur_pt_access')
dur_pt_rail=Variable('dur_pt_rail')
dur_pt_bus=Variable('dur_pt_bus')
dur_pt_int=Variable('dur_pt_int')

pt_interchanges=Variable('pt_interchanges')

cost_driving_fuel=Variable('cost_driving_fuel')
cost_driving_ccharge=Variable('cost_driving_ccharge')
cost_drive=Variable('cost_drive')
cost_pt=Variable('cost_transit')

time_walk=Variable('dur_walking')
time_cycle=Variable('dur_cycling')
time_pt=Variable('dur_pt')
time_drive=Variable('dur_driving')

female=Variable('female')
age=Variable('age')

There are 4 different travel mode, we will thus build a model with 4 utility functions, we create 3 alternative specific constant, a generic parameter for travel time and a generic parameter for cost

In [16]:
asc_cycle = Beta('asc_cycle', 0, None, None, 0)
asc_pt = Beta('asc_pt', 0, None, None, 0)
asc_drive = Beta('asc_drive', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

Utility functions creation

In [17]:
v_walk_model0= beta_time * time_walk  
v_cycle_model0= asc_cycle + beta_time * time_cycle 
v_pt_model0= asc_pt + beta_time * time_pt + beta_cost * cost_pt
v_drive_model0= asc_drive + beta_time * time_drive + beta_cost * cost_drive

availability of each mode, all available here

In [18]:
av = {1: 1, 2: 1, 3: 1, 4:1}

The estimation results (parameter values, t-tests or p-values, null and nal log likelihoods)

In [19]:
V_model0 = {1: v_walk_model0 , 2: v_cycle_model0, 3: v_pt_model0, 4: v_drive_model0}
logprob_model0 = models.loglogit(V_model0, av, travel_mode)
biogeme_model0 = bio.BIOGEME(database, logprob_model0)
biogeme_model0.modelName = 'Model_0'
all_results['Model0'] = biogeme_model0.estimate()
results_generic = biogeme_model0.estimate()
results_generic.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycle,-3.878619,0.107672,-36.022418,0.0
asc_drive,-1.295191,0.080505,-16.088354,0.0
asc_pt,-0.503354,0.054123,-9.300119,0.0
beta_cost,-0.193629,0.013958,-13.871889,0.0
beta_time,-5.495527,0.208596,-26.345342,0.0


In [20]:
res.compile_estimation_results(all_results)

(                                         Model0
 Number of estimated parameters                5
 Sample size                                5000
 Final log likelihood               -4587.818071
 Akaike Information Criterion        9185.636142
 Bayesian Information Criterion      9218.222108
 asc_cycle (t-test)                 -3.88  (-36)
 asc_drive (t-test)                -1.3  (-16.1)
 asc_pt (t-test)                  -0.503  (-9.3)
 beta_cost (t-test)              -0.194  (-13.9)
 beta_time (t-test)                -5.5  (-26.3),
 {'Model0': 'Model0'})

In [21]:
print("Null Loglikelihood : ")
biogeme_model0.calculateNullLoglikelihood(av)

Null Loglikelihood : 


-6931.471805599917

# Model 1