# MMOB Project

# import

in this cell, relevant libraries are imported, as well as the datafile imported from our github and a dict that will store all results during the project 

In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable
from biogeme import models
from biogeme import results as res
data_file ='https://raw.githubusercontent.com/DunodMax/MMOB/main/Dataset.txt'
LPMC = pd.read_csv(data_file, sep='\t')
LPMC
database = db.Database('LPMC', LPMC)
all_results = {}

# Model 0

all useful columns for this model are used as variables

In [2]:
travel_mode=Variable('travel_mode')
dur_pt_access=Variable('dur_pt_access')
dur_pt_rail=Variable('dur_pt_rail')
dur_pt_bus=Variable('dur_pt_bus')
dur_pt_int=Variable('dur_pt_int')
pt_interchanges=Variable('pt_interchanges')
dur_driving=Variable('dur_driving')
cost_transit=Variable('cost_transit')
cost_driving_fuel=Variable('cost_driving_fuel')
cost_driving_ccharge=Variable('cost_driving_ccharge')
dur_walking=Variable('dur_walking')
dur_cycling=Variable('dur_cycling')

Trip duration for each mode are computed ( see report for details about equations)
Cost for each mode with a cost are computed ( cycling and walking is considered free)

In [3]:
time_walk=dur_walking
time_cycle=dur_cycling
time_pt=dur_pt_access+dur_pt_rail+dur_pt_bus+dur_pt_int
time_drive=dur_driving

cost_drive=cost_driving_ccharge+cost_driving_fuel
cost_pt=cost_transit

There is 4 different travel mode, we will thus build a model with 4 utility functions, we create 3 alternative specific constant, a generic parameter for travel time and a generic parameter for cost

In [4]:
asc_cycle = Beta('asc_cycle', 0, None, None, 0)
asc_pt = Beta('asc_pt', 0, None, None, 0)
asc_drive = Beta('asc_drive', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

utility functions

In [5]:
v_walk_model0= beta_time * time_walk  
v_cycle_model0= asc_cycle + beta_time * time_cycle 
v_pt_model0= asc_pt + beta_time * time_pt + beta_cost * cost_pt
v_drive_model0= asc_drive + beta_time * time_drive + beta_cost * cost_drive

In [6]:
av = {1: 1, 2: 1, 3: 1, 4:1}

estimation of the parameters for Model 0, all relevant parameters are printed

In [7]:
V_model0 = {1: v_walk_model0 , 2: v_cycle_model0, 3: v_pt_model0, 4: v_drive_model0}
logprob_model0 = models.loglogit(V_model0, av, travel_mode)
biogeme_model0 = bio.BIOGEME(database, logprob_model0)
biogeme_model0.modelName = 'Model_0'
all_results['Model0'] = biogeme_model0.estimate()
results_generic = biogeme_model0.estimate()
results_generic.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycle,-3.832163,0.10268,-37.321371,0.0,0.107574,-35.623351,0.0
asc_drive,-1.235707,0.073716,-16.763025,0.0,0.0804,-15.369411,0.0
asc_pt,-0.528767,0.053297,-9.921212,0.0,0.054766,-9.655006,0.0
beta_cost,-0.16978,0.013321,-12.745694,0.0,0.013108,-12.952753,0.0
beta_time,-5.45054,0.181898,-29.964846,0.0,0.202684,-26.891767,0.0


In [8]:
res.compileEstimationResults(all_results)

AttributeError: module 'biogeme.results' has no attribute 'compileEstimationResults'

In [9]:
print("Null Loglikelihood : ")
biogeme_model0.calculateNullLoglikelihood(av)

Null Loglikelihood : 


-6931.471805599454

# Model 1 

### Time specification

We will try a specification where the the cost coeffecient is Generic and the time coefficient is alternative specific

Again 4 different travel mode, 4 utility functions and 3 alternative specific constant.
This time we have the generic parameter for cost and a 4 new specific parameter for travel time

In [10]:
beta_time_drive = Beta('beta_time_drive', 0, None, None, 0)
beta_time_pt = Beta('beta_time_pt', 0, None, None, 0)
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycle = Beta('beta_time_cycle', 0, None, None, 0)

New utility functions :

In [11]:
v_walk_model1 = beta_time_walk * time_walk  
v_cycle_model1 = asc_cycle + beta_time_cycle * time_cycle 
v_pt_model1 = asc_pt + beta_time_pt * time_pt + beta_cost * cost_pt
v_drive_model1 = asc_drive + beta_time_drive * time_drive + beta_cost * cost_drive

Estimation of the parameters for Model 1, all relevant parameters are printed

In [12]:
V_model1 = {1: v_walk_model1 , 2: v_cycle_model1, 3: v_pt_model1, 4: v_drive_model1}
logprob_model1 = models.loglogit(V_model1, av, travel_mode)
biogeme_model1 = bio.BIOGEME(database, logprob_model1)
biogeme_model1.modelName = 'Model1_time_specification'
all_results['Model1_time_specification'] = biogeme_model1.estimate()
results_alt_spec_time = biogeme_model1.estimate()
results_alt_spec_time.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycle,-4.509582,0.171871,-26.238104,0.0,0.191029,-23.606835,0.0
asc_drive,-1.885697,0.106697,-17.673422,0.0,0.122741,-15.363274,0.0
asc_pt,-2.339705,0.114622,-20.412333,0.0,0.125798,-18.598844,0.0
beta_cost,-0.146062,0.014772,-9.887617,0.0,0.01485,-9.835764,0.0
beta_time_cycle,-6.12148,0.467856,-13.08411,0.0,0.542685,-11.279996,0.0
beta_time_drive,-6.445071,0.333687,-19.314722,0.0,0.391346,-16.469,0.0
beta_time_pt,-3.514556,0.233212,-15.070208,0.0,0.258207,-13.611364,0.0
beta_time_walk,-8.20483,0.290738,-28.220696,0.0,0.364367,-22.518008,0.0


In [13]:
res.compileEstimationResults(all_results)

AttributeError: module 'biogeme.results' has no attribute 'compileEstimationResults'

### Cost specification

We compare with the model where we specify the price, to be sure to have the most interesting model for the future models. (walk and cycling don't have cost, so only 2 specific parameter for cost)

In [28]:
beta_cost_drive = Beta('beta_cost_drive', 0, None, None, 0)
beta_cost_pt = Beta('beta_cost_pt', 0, None, None, 0)

New utility function

In [29]:
v_walk_model1_spec_cost = beta_time * time_walk  
v_cycle_model1_spec_cost = asc_cycle + beta_time * time_cycle 
v_pt_model1_spec_cost = asc_pt + beta_time * time_pt + beta_cost_pt * cost_pt
v_drive_model1_spec_cost = asc_drive + beta_time * time_drive + beta_cost_drive * cost_drive

In [30]:
V_model1_spec_cost = {1: v_walk_model1_spec_cost , 2: v_cycle_model1_spec_cost, 3: v_pt_model1_spec_cost, 4: v_drive_model1_spec_cost}
logprob_model1_spec_cost = models.loglogit(V_model1_spec_cost, av, travel_mode)
biogeme_model1_spec_cost = bio.BIOGEME(database, logprob_model1_spec_cost)
biogeme_model1_spec_cost.modelName = 'Model_1_cost_specifiaction'
all_results['Model1_cost_specification'] = biogeme_model1_spec_cost.estimate()
results_alt_spec_cost = biogeme_model1_spec_cost.estimate()
results_alt_spec_cost.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycle,-3.766977,0.10277,-36.654518,0.0,0.107734,-34.965546,0.0
asc_drive,-1.13742,0.07382,-15.408073,0.0,0.080193,-14.183456,0.0
asc_pt,-0.831977,0.062068,-13.404311,0.0,0.064856,-12.828122,0.0
beta_cost_drive,-0.214652,0.01725,-12.443376,0.0,0.021273,-10.090343,0.0
beta_cost_pt,0.061553,0.026157,2.353215,0.018612,0.028466,2.162358,0.030591
beta_time,-5.411667,0.183127,-29.551491,0.0,0.202912,-26.670039,0.0


In [31]:
res.compileEstimationResults(all_results)

AttributeError: module 'biogeme.results' has no attribute 'compileEstimationResults'

### Comparaison

we compare the Akaike Information Criterion (AIC)

In [32]:
print("Model 0 PIC value :",results_generic.data.akaike)
print("Model 1, time alternative specification PIC value :",results_alt_spec_time.data.akaike)
print("Model 1, cost alternative specification PIC value :",results_alt_spec_cost.data.akaike)

Model 0 PIC value : 9142.89298347718
Model 1, time alternative specification PIC value : 8545.6684953841
Model 1, cost alternative specification PIC value : 9030.730461378622


we compare the Bayesian Information Criterion (BIC)

In [33]:
print("Model 0 PIC value :",results_generic.data.bayesian)
print("Model 1, alternative specification travel time, PIC value :",results_alt_spec_time.data.bayesian)
print("Model 1, alternative specification cost, PIC value :",results_alt_spec_cost.data.bayesian)

Model 0 PIC value : 9175.478949434262
Model 1, alternative specification travel time, PIC value : 8597.80604091543
Model 1, alternative specification cost, PIC value : 9069.833620527119


According to the AIC and the BIC, the best model is the one wih the alternative specific travel time coefficient, and the generic travel cost coefficient.

# Model 2

In [58]:
LPMC.columns

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent'],
      dtype='object')

In [59]:
model_base=biogeme_model1

In [60]:
#we're adding the attribute distance to our model 
#we're adding the interaction between age and distance
dur_pt_int=Variable('dur_pt_int')
driving_license=Variable('driving_license')

In [61]:
#defining associated coefficients
beta_pt_inter= Beta('beta_pt_inter', 0, None, None, 0)
beta_license= Beta('beta_license', 0, None, None, 0)

In [62]:
#the specification, we only consider the alternative distance for walking
#the interaction distance age is considered just for public trasnport
v_walk_model2 = v_walk_model1
v_cycle_model2 = v_cycle_model1
v_pt_model2 = v_pt_model1 + beta_pt_inter*dur_pt_int
v_drive_model2 = v_drive_model1 + asc_drive*beta_license*driving_license

In [63]:
#the estimation results
V_model2 = {1: v_walk_model2 , 2: v_cycle_model2, 3: v_pt_model2, 4: v_drive_model2}
logprob_model2 = models.loglogit(V_model2, av, travel_mode)
biogeme_model2 = bio.BIOGEME(database, logprob_model2)
biogeme_model2.modelName = 'Model_2'
all_results['Model_2'] = biogeme_model2.estimate()
results_model2 = biogeme_model2.estimate()
results_model2.getEstimatedParameters()

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycle,-4.559722,0.174154,-26.182099,0.0,0.196662,-23.185537,0.0
asc_drive,-2.773931,0.119784,-23.157712,0.0,0.138465,-20.033384,0.0
asc_pt,-2.442774,0.119358,-20.466009,0.0,0.131427,-18.586579,0.0
beta_cost,-0.138801,0.014346,-9.675237,0.0,0.013822,-10.041922,0.0
beta_license,-0.518761,0.025992,-19.958497,0.0,0.028063,-18.48536,0.0
beta_pt_inter,-1.446374,0.714046,-2.025604,0.042805,0.721894,-2.003582,0.045115
beta_time_cycle,-6.307711,0.477391,-13.212885,0.0,0.558475,-11.294527,0.0
beta_time_drive,-7.022215,0.350296,-20.046519,0.0,0.411923,-17.047404,0.0
beta_time_pt,-3.445152,0.262389,-13.129949,0.0,0.283826,-12.138253,0.0
beta_time_walk,-8.36313,0.296596,-28.19705,0.0,0.376843,-22.192582,0.0


In [64]:
res.compileEstimationResults(results_model2)

AttributeError: module 'biogeme.results' has no attribute 'compileEstimationResults'

In [69]:
#we test base_model against model_2 where base_model is a restricted version of model_2 suing the looglikehood
#ratio test

In [66]:
#we test them using the null hypothesis that our base model is the true model

In [67]:
results_base=model_base.estimate()

In [68]:
results_model2.likelihood_ratio_test(results_base, 0.05) #level of the statistics for a level of significance of 5%?

AttributeError: 'bioResults' object has no attribute 'likelihood_ratio_test'

In [70]:
#then depending on if the value I obtained was higher or lower than a certain number, we reject or accept the null hypothesis

In [72]:
from scipy.stats import chi2
threshold = chi2.ppf(.95, 2)
threshold #just in case we might want to compare to this threshold

5.991464547107979