In [76]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable, log, exp
from biogeme import models
from biogeme import results as res

In [77]:
#data_file = "http://transp-or.epfl.ch/data/lpmc.dat"
data_file='lpmc10.dat'
lpmc = pd.read_csv(data_file, sep='\t')
lpmc

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,20,5,1,0,4,3,1,5,0.0,1,...,0.381667,0.000000,0.062222,0.000000,0,0.117222,0.00,0.41,0.0,0.097156
1,41,9,3,0,4,3,1,5,0.0,1,...,0.146944,0.000000,0.225000,0.000000,0,0.200833,0.00,0.48,0.0,0.378976
2,69,13,2,1,4,3,1,1,1.0,1,...,0.029444,0.083333,0.735833,0.398056,3,0.716944,6.00,2.16,0.0,0.582720
3,102,20,2,0,2,3,1,1,1.0,1,...,0.339722,0.183333,0.116667,0.266667,1,0.250833,3.00,0.89,0.0,0.170543
4,105,21,0,1,4,3,1,1,1.0,1,...,0.126389,0.000000,0.150000,0.000000,0,0.125833,1.50,0.37,0.0,0.154525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,80998,17591,0,5,4,3,1,5,0.0,3,...,0.189167,0.000000,0.117778,0.000000,0,0.082500,0.00,0.22,10.5,0.239057
4996,81000,17592,0,0,3,3,6,5,0.0,3,...,0.105278,0.000000,0.220278,0.000000,0,0.213611,0.00,0.52,0.0,0.412224
4997,81015,17597,0,3,4,3,1,5,0.0,3,...,0.343056,0.000000,0.177500,0.000000,0,0.189444,0.00,0.76,0.0,0.086510
4998,81041,17604,2,4,3,1,1,2,0.0,3,...,0.344444,0.316667,0.000000,0.083333,1,0.386111,1.05,0.98,0.0,0.340288


In [78]:
# cost_driving = cost_driving_fuel + cost_driving_ccharge #total costs of driving
# dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #total duration of public transportation
lpmc['cost_driving']=lpmc['cost_driving_ccharge']+lpmc['cost_driving_fuel']
lpmc['dur_public']=lpmc['dur_pt_access']+lpmc['dur_pt_rail']+lpmc['dur_pt_bus']+lpmc['dur_pt_int']

In [79]:
database = db.Database('trips', lpmc)

In [80]:
lpmc.columns

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent', 'cost_driving',
       'dur_public'],
      dtype='object')

In [81]:
# define variables
for col in lpmc.columns:
    globals()[col]=Variable(col)

# Choice availability

In [86]:
law_violators =(lpmc['driving_license']==0 )& (lpmc['travel_mode']==4)
law_violators.value_counts()

False    4511
True      489
dtype: int64

# Model 0

In [8]:
asc_walk = Beta('asc_walk', 0, None, None, 1)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [9]:
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

In [10]:
V_walk = asc_walk + beta_time * dur_walking
V_cycling = asc_cycling + beta_time * dur_cycling
V_driving = asc_driving + beta_time * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time * dur_public + beta_cost * cost_transit

In [11]:
V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

In [12]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_base'

In [13]:
results0 = biogeme.estimate()

In [14]:
print(results0.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4722.972
Final log likelihood:	-4722.972
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.00106
Akaike Information Criterion:	9455.944
Bayesian Information Criterion:	9488.53
Final gradient norm:	1.0691E-03
Nbr of threads:	16



In [15]:
results0.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-3.660824,0.100795,-36.319469,0.0
asc_driving,-1.335138,0.076403,-17.474949,0.0
asc_public,-0.559066,0.05262,-10.624628,0.0
beta_cost,-0.14985,0.01378,-10.87426,0.0
beta_time,-5.410269,0.188336,-28.726735,0.0


In [249]:
# results0.data.htmlFileName

# Model 1


In [16]:
asc_walk = Beta('asc_walk', 0, None, None, 1)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [17]:
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)
beta_time_public = Beta('beta_time_public', 0, None, None, 0)

In [18]:
V_walk = asc_walk + beta_time_walk * dur_walking
V_cycling = asc_cycling + beta_time_cycling * dur_cycling
V_driving = asc_driving + beta_time_driving * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit

In [19]:
V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

In [20]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1'

In [21]:
results1 = biogeme.estimate()



In [22]:
print(results1.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-7272.812
Final log likelihood:	-4412.008
Likelihood ratio test for the init. model:	5721.609
Rho-square for the init. model:	0.393
Rho-square-bar for the init. model:	0.392
Akaike Information Criterion:	8840.016
Bayesian Information Criterion:	8892.153
Final gradient norm:	1.0837E-02
Nbr of threads:	16



In [23]:
results1.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-4.59052,0.179035,-25.640344,0.0
asc_driving,-2.072026,0.120163,-17.243441,0.0
asc_public,-2.43948,0.121947,-20.004459,0.0
beta_cost,-0.141785,0.015194,-9.331829,0.0
beta_time_cycling,-5.19589,0.423903,-12.257269,0.0
beta_time_driving,-5.875433,0.359796,-16.329903,0.0
beta_time_public,-3.200594,0.230747,-13.870583,0.0
beta_time_walk,-8.367587,0.360099,-23.236919,0.0


In [24]:
# results1.data.htmlFileName

## Comparing Models 0 and 1

In [25]:
general_statistics_model_0 = results0.getGeneralStatistics()
print(results0.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-4722.972
Final log likelihood:	-4722.972
Likelihood ratio test for the init. model:	-0
Rho-square for the init. model:	0
Rho-square-bar for the init. model:	-0.00106
Akaike Information Criterion:	9455.944
Bayesian Information Criterion:	9488.53
Final gradient norm:	1.0691E-03
Nbr of threads:	16



In [26]:
general_statistics_model_1 = results1.getGeneralStatistics()
print(results1.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-7272.812
Final log likelihood:	-4412.008
Likelihood ratio test for the init. model:	5721.609
Rho-square for the init. model:	0.393
Rho-square-bar for the init. model:	0.392
Akaike Information Criterion:	8840.016
Bayesian Information Criterion:	8892.153
Final gradient norm:	1.0837E-02
Nbr of threads:	16



### Likelihood ratio test

$H_O$: $\beta\_time\_walk=\beta\_time\_driving=\beta\_time\_public=\beta\_time\_cycling$

Because model 0 is an restricted version of model 1, we can apply the likelihood ratio test.

In [27]:
alpha=0.05
results1.likelihood_ratio_test(results0, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=621.9276889648063, threshold=7.814727903251179)

According to the test result, the null hypothesis is rejected at the 5% level.

# Model 2

1. One alternative attribute: fueltype for driving.
2. One socio-economic characteristics: female

<!-- Model 2 specification:

$ V_{walk}=ASC_{walk}+\beta_{time\_walk}time_{walk} $

$ V_{cycling}=ASC_{cycling}+\beta_{time\_cycle}time_{cycle} $

$ V_{public}=ASC_{public}+\beta_{time\_public}time_{public}+\beta_{cost}cost_{public}+\beta_{traffic}driving\_traffic\_percent$

$ V_{driving}=ASC_{driving}+\beta_{time\_driving}time_{driving}+\beta_{cost}cost_{driving}+\beta_{driving_liscence}drivingliscence$ -->


In [28]:
asc_walk = Beta('asc_walk', 0, None, None, 1) 
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [29]:
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)
beta_time_public = Beta('beta_time_public', 0, None, None, 0)

beta_cost = Beta('beta_cost', 0, None, None, 0)

# alternative attribute
beta_traffic = Beta('beta_driving_traffic_percent',0, None, None,0)
beta_interchange = Beta('beta_interchange',0, None, None,0)
# social economics characteristics
beta_drivingliscence = Beta('beta_drivingliscence',0,None,None,0)

In [30]:
# model 2 specification 
V_walk = asc_walk + beta_time_walk * dur_walking
V_cycling = asc_cycling + beta_time_cycling * dur_cycling
V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit 
# beta_interchange*pt_interchanges
V_driving = asc_driving +beta_drivingliscence*driving_license+ beta_time_driving * dur_driving + beta_cost * cost_driving + beta_traffic*driving_traffic_percent

V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

biogeme.modelName = 'model_2'

In [31]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_2'

In [32]:
results2 = biogeme.estimate()



In [33]:
print(results2.printGeneralStatistics())

Number of estimated parameters:	10
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4116.139
Likelihood ratio test for the init. model:	5630.666
Rho-square for the init. model:	0.406
Rho-square-bar for the init. model:	0.405
Akaike Information Criterion:	8252.278
Bayesian Information Criterion:	8317.45
Final gradient norm:	1.8355E-02
Nbr of threads:	16



In [34]:
results2.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-4.660981,0.186396,-25.005737,0.0
asc_driving,-2.407648,0.137885,-17.461337,0.0
asc_public,-2.619489,0.128107,-20.44766,0.0
beta_cost,-0.089926,0.015067,-5.968294,2.397466e-09
beta_driving_traffic_percent,-2.958441,0.235979,-12.536867,0.0
beta_drivingliscence,1.441437,0.071179,20.250883,0.0
beta_time_cycling,-4.784378,0.438025,-10.922614,0.0
beta_time_driving,-4.418347,0.37043,-11.927604,0.0
beta_time_public,-2.8304,0.235259,-12.03098,0.0
beta_time_walk,-8.374844,0.370402,-22.610177,0.0


In [271]:
# results2.data.htmlFileName

## Compare model 2 and 1
Since model 1 is a restricted version of model 2, we perform a likelihood ratio test. Or we can simply perform t-test on the coefficients $\beta_{driving\_liscence}$ and $\beta_{pt_interchanges}$, as given by the estimation table.

In [35]:
results2.likelihood_ratio_test(results1, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=591.7382206371785, threshold=5.991464547107979)

Thus, model 2 is preferred to model 1.

# Model 3

We perform box-cox transformation on travel time for every alternatives.

In [36]:
# define lambda
lambda_boxcox = Beta('lambda_boxcox', 1, None, None, 0)
boxcox_time_1 = models.boxcox(dur_walking, lambda_boxcox)
boxcox_time_2 = models.boxcox(dur_cycling, lambda_boxcox)
boxcox_time_3 = models.boxcox(dur_public, lambda_boxcox)
boxcox_time_4 = models.boxcox(dur_driving, lambda_boxcox)

In [37]:
# model 3 specification 
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3+ beta_cost * cost_transit 
V_driving = asc_driving + beta_drivingliscence*driving_license + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving +beta_traffic*driving_traffic_percent

V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

biogeme.modelName = 'model_3'

In [38]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3'
results3 = biogeme.estimate()
print(results3.printGeneralStatistics())

Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4071.611
Likelihood ratio test for the init. model:	5719.722
Rho-square for the init. model:	0.413
Rho-square-bar for the init. model:	0.411
Akaike Information Criterion:	8165.221
Bayesian Information Criterion:	8236.91
Final gradient norm:	2.1995E-02
Nbr of threads:	16



In [39]:
results3.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-1.804932,0.279523,-6.457183,1.0667e-10
asc_driving,0.814865,0.273886,2.975197,0.002928003
asc_public,1.974105,0.173882,11.353111,0.0
beta_cost,-0.091493,0.014584,-6.273438,3.531622e-10
beta_driving_traffic_percent,-2.60105,0.240714,-10.805545,0.0
beta_drivingliscence,1.445317,0.071811,20.12677,0.0
beta_time_cycling,-3.026531,0.276831,-10.932758,0.0
beta_time_driving,-2.650897,0.270057,-9.816063,0.0
beta_time_public,-2.224312,0.191608,-11.608648,0.0
beta_time_walk,-5.377137,0.298874,-17.991325,0.0


## Compare model 3 and 2
Under the null hypothesis that $\lambda=1$, the statistic $$\frac{\widehat{\lambda}-1}{\widehat{\sigma}_\lambda}$$ follows approximatively a $N(0, 1)$.  
Thus, we perform a t-test on $\lambda$.

In [40]:
estimated_parameters_3=results3.getEstimatedParameters()
t_test_lambda = (estimated_parameters_3.loc['lambda_boxcox','Value'] - 1) / estimated_parameters_3.loc['lambda_boxcox', 'Rob. Std err']
t_test_lambda

-11.12116823094468

In [41]:
from scipy.stats import norm
from biogeme.results import calcPValue

In [279]:
norm.ppf(.95)

1.6448536269514722

In [280]:
calcPValue(t_test_lambda)

0.0

Because the $p-value<0.05$, we reject the null hypothesis that $\lambda=1$. Thus, model 3 is preferred.

Though not exactly relevant as one is not the linear restriction of the other, we also perform a likelihood ratio test to check.

In [281]:
alpha = 0.05
results3.likelihood_ratio_test(results2, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=527.2084260451329, threshold=5.991464547107979)

A more general way is to perform a **Davidson and McKinnon J test**  on model 2 and 3. 

# Model 4.a

We define Model 4 as a nested version of Model 3 where we regroup motorized (public transports and car) as a nest, and walking and cycling as two other nests.

In [282]:
MU_motorized = Beta('MU_motorized', 1, 0, None, 0)
motorized = MU_motorized, [3, 4]
non_motorized = 1, [1,2]
nests = motorized, non_motorized
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_4'
results4 = biogeme.estimate(recycle=False)

In [283]:
results4.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
MU_motorized,2.014567,0.263082,7.657558,1.887379e-14
asc_cycling,-0.957084,0.279949,-3.418777,0.0006290316
asc_driving,2.100845,0.266638,7.879006,3.330669e-15
asc_public,2.779685,0.198493,14.003925,0.0
beta_cost,-0.03735,0.010136,-3.684845,0.0002288416
beta_driving_traffic_percent,-1.401906,0.22984,-6.099475,1.064177e-09
beta_drivingliscence,0.844477,0.102596,8.231086,2.220446e-16
beta_time_cycling,-1.877874,0.240445,-7.809999,5.77316e-15
beta_time_driving,-1.412744,0.211283,-6.686495,2.285794e-11
beta_time_public,-1.205783,0.172992,-6.970149,3.166134e-12


## Compare model 4 and 3

In [284]:
# t-test
nested_existing_table = results4.getEstimatedParameters()
mu_moto = nested_existing_table.loc['MU_motorized', 'Value']
mu_moto

2.014567378133125

In [285]:
mu_stderr = nested_existing_table.loc['MU_motorized', 'Rob. Std err']
tested_value = 1
ttest = (tested_value - mu_moto) / mu_stderr
ttest

-3.8564648041870897

Therefore, the null hypothesis is rejected at the 5% level.

In [286]:
# likelihood ratio test
results4.likelihood_ratio_test(results3, significance_level=0.05)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=46.85224333329916, threshold=3.841458820694124)

# Model 4.b

In [287]:
MU_private = Beta('MU_private', 1, 0, None, 0)
private = MU_private, [1,2,4]
public = 1.0, [3]
nests = private,public
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_5'
results5 = biogeme.estimate(recycle=True)
results5.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
MU_private,0.650852,0.042759,15.221484,0.0
asc_cycling,-1.516912,0.388055,-3.909017,9.267229e-05
asc_driving,2.213199,0.406245,5.447939,5.095697e-08
asc_public,3.3687,0.358378,9.399845,0.0
beta_cost,-0.083946,0.016215,-5.176948,2.255452e-07
beta_driving_traffic_percent,-3.18826,0.288127,-11.065469,0.0
beta_drivingliscence,1.777428,0.100008,17.772843,0.0
beta_time_cycling,-3.101768,0.319821,-9.69844,0.0
beta_time_driving,-2.767161,0.286051,-9.673675,0.0
beta_time_public,-2.645343,0.228023,-11.601217,0.0


Since $\mu_{\text{private}}<1$, we reject this hypothesis. 

# Sampling

## Calculate the weight for each segment

In [64]:
modes = ["walk","cycling","public","driving"]

In [43]:
# Population statistics

census = {
    'male_41_more':  1633263,
    'male_40_less':  2676249,
    'female_41_more':  1765143,
    'female_40_less':  2599058
}

In [44]:
total = sum(census.values())
total

8673713

In [45]:
lpmc["40_less"]=lpmc["age"]<=40
lpmc["40_less"]

0       False
1        True
2        True
3        True
4       False
        ...  
4995    False
4996    False
4997    False
4998     True
4999     True
Name: 40_less, Length: 5000, dtype: bool

In [46]:
# sample statistics
filters = {
    'male_41_more': (lpmc['40_less'] == 0) & (lpmc['female'] == 0),
    'male_40_less': (lpmc['40_less'] == 1) & (lpmc['female'] == 0),
    'female_41_more': (lpmc['40_less'] == 0) & (lpmc['female'] == 1),
    'female_40_less': (lpmc['40_less'] == 1) & (lpmc['female'] == 1)
}

In [47]:
sample_segments = {
    k: v.sum() for k, v in filters.items()
}
sample_segments

{'male_41_more': 1084,
 'male_40_less': 1256,
 'female_41_more': 1204,
 'female_40_less': 1456}

In [48]:
total_sample = sum(sample_segments.values())
total_sample

5000

The weight $w_g$ associated with segment $g$ is defined as
$$
w_g = \frac{N_g}{N}\frac{S}{S_g}.
$$

In [49]:
# weight associated with each segment
weights = {
    k: census[k] * total_sample / (v * total) 
    for k, v in sample_segments.items()
}
weights

{'male_41_more': 0.86854394680908,
 'male_40_less': 1.2282925990376248,
 'female_41_more': 0.8451199703160228,
 'female_40_less': 1.029009899043413}

In [50]:
# insert weight as a new column to the database lpmc
for k, f in filters.items():
    lpmc.loc[f, 'Weight'] = weights[k] 

## Calculate predicted market share for each mode

In [51]:
MU_motorized = Beta('MU_motorized', 1, 0, None, 0)
motorized = MU_motorized, [3, 4]
non_motorized = 1, [1,2]
nests = motorized, non_motorized
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_4'
results4 = biogeme.estimate(recycle=True)



In [58]:
prob_walk = models.nested(V, None, nests,1)
prob_cycling = models.nested(V, None, nests,2)
prob_public = models.nested(V, None, nests,3)
prob_driving = models.nested(V, None, nests,4)

In [65]:
Weight = Variable('Weight')
simulate = {
    'Weight': Weight,
    'Prob. walk': prob_walk,
    'Prob. cycling': prob_cycling,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving
}

In [None]:
biosim = bio.BIOGEME(database, simulate)
simulated_values = biosim.simulate(results4.getBetaValues())

In [None]:
simulated_values

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving
0,0.845120,0.046095,0.027442,0.051681,0.874781
1,1.029010,0.247633,0.078221,0.479692,0.194454
2,1.228293,0.000313,0.035247,0.752680,0.211760
3,1.228293,0.002626,0.017689,0.045316,0.934369
4,0.868544,0.082050,0.037395,0.122870,0.757685
...,...,...,...,...,...
4995,0.845120,0.371744,0.034708,0.108865,0.484683
4996,0.845120,0.055884,0.050297,0.709961,0.183858
4997,0.868544,0.003089,0.014134,0.085311,0.897465
4998,1.228293,0.022966,0.060483,0.600052,0.316499


Market shares are calculated using the weighted mean of the
individual probabilities.

In [None]:
modes = ["walk","cycling","public","driving"]
for mode in modes:
    simulated_values['Weighted '+mode] = (
        simulated_values['Weight'] * 
        simulated_values['Prob. '+mode]
    )

In [None]:
simulated_values

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving,Weighted walk,Weighted cycling,Weighted public,Weighted driving
0,0.845120,0.046095,0.027442,0.051681,0.874781,0.038956,0.023192,0.043677,0.739295
1,1.029010,0.247633,0.078221,0.479692,0.194454,0.254817,0.080490,0.493608,0.200095
2,1.228293,0.000313,0.035247,0.752680,0.211760,0.000384,0.043294,0.924512,0.260103
3,1.228293,0.002626,0.017689,0.045316,0.934369,0.003226,0.021727,0.055661,1.147678
4,0.868544,0.082050,0.037395,0.122870,0.757685,0.071264,0.032479,0.106718,0.658083
...,...,...,...,...,...,...,...,...,...
4995,0.845120,0.371744,0.034708,0.108865,0.484683,0.314168,0.029332,0.092004,0.409616
4996,0.845120,0.055884,0.050297,0.709961,0.183858,0.047229,0.042507,0.600002,0.155382
4997,0.868544,0.003089,0.014134,0.085311,0.897465,0.002683,0.012276,0.074096,0.779488
4998,1.228293,0.022966,0.060483,0.600052,0.316499,0.028209,0.074291,0.737039,0.388753


In [None]:
market_share=[]
for i, mode in enumerate(modes): 
    globals()['market_share_'+mode]=simulated_values['Weighted '+ mode].mean()
    market_share.append(globals()['market_share_'+mode])
    print(f'Market share for {mode}: {100*market_share[i]:.1f}%')

Market share for walk: 18.3%
Market share for cycling: 3.6%
Market share for public: 36.3%
Market share for driving: 41.9%


## Calcualte confidence interval

In [306]:
results_bootstrapping = biogeme.estimate(bootstrap=10)

In [307]:
betas = biogeme.freeBetaNames()
b = results_bootstrapping.getBetasForSensitivityAnalysis(betas)

In [308]:
left, right = biosim.confidenceIntervals(b, 0.9)

In [309]:
left

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving
0,0.845120,0.036112,0.024140,0.041951,0.861047
1,1.029010,0.231407,0.071436,0.457468,0.176094
2,1.228293,0.000225,0.024283,0.725867,0.168647
3,1.228293,0.001875,0.014862,0.040357,0.924786
4,0.868544,0.068872,0.034582,0.104746,0.742138
...,...,...,...,...,...
4995,0.845120,0.343747,0.031903,0.084898,0.430869
4996,0.845120,0.053621,0.048049,0.700269,0.166383
4997,0.868544,0.002086,0.011363,0.072111,0.883371
4998,1.228293,0.021503,0.050354,0.580465,0.286034


In [310]:
for mode in modes:
    left['Weighted '+mode] = (
        left['Weight'] * 
        left['Prob. '+mode]
    )
    right['Weighted '+mode] = (
        right['Weight'] * 
        right['Prob. '+mode]
    )

In [311]:
left_market_share=[]
right_market_share=[]
for i,mode in enumerate(modes): 
    globals()['left_market_share_'+mode]=left['Weighted '+ mode].mean()
    globals()['right_market_share_'+mode]=right['Weighted '+ mode].mean()
    left_market_share.append(globals()['left_market_share_'+mode])
    right_market_share.append(globals()['right_market_share_'+mode])
    print(
    f'Market share for {mode}: {100*market_share[i]:.1f}% '
    f'CI: ['
    f'{100*left_market_share[i]:.1f}%-'
    f'{100*right_market_share[i]:.1f}'
    f']'
)

Market share for walk: 18.3% CI: [17.4%-19.2]
Market share for cycling: 3.6% CI: [3.2%-4.3]
Market share for public: 36.3% CI: [34.5%-38.1]
Market share for driving: 41.9% CI: [39.7%-43.8]


# Forcasting

## Consider two scenarios

In [66]:
# define a function for calculating market share
def calculate_market_share(utility):
    
    prob_walk = models.nested(utility, None, nests,1)
    prob_cycling = models.nested(utility, None, nests,2)
    prob_public = models.nested(utility, None, nests,3)
    prob_driving = models.nested(utility, None, nests,4)

    Weight = Variable('Weight')
    simulate = {
    'Weight': Weight,
    'Prob. walk': prob_walk,
    'Prob. cycling': prob_cycling,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving
    }

    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())
    modes = ["walk","cycling","public","driving"]
    for mode in modes:
        simulated_values['Weighted '+mode] = (
            simulated_values['Weight'] * 
            simulated_values['Prob. '+mode]
        )
    market_share=[]
    for mode in modes: 
        globals()['market_share_'+mode]=simulated_values['Weighted '+ mode].mean()
        market_share.append(globals()['market_share_'+mode])
    
    return market_share

In [67]:
# an increase of car cost by 15%
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3+ beta_cost * cost_transit 
V_driving = asc_driving + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving*1.15 + beta_drivingliscence * driving_license +beta_traffic*driving_traffic_percent
V_1 = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

In [68]:
market_share_1 = calculate_market_share(V_1)

In [69]:
market_share_1

[0.18320582887525586,
 0.035756802077457656,
 0.3649768902947506,
 0.4160604787525359]

In [70]:
for i,mode in enumerate(modes): 
    print(f'Market share for {mode}: {100*market_share_1[i]:.2f}%')

Market share for walk: 18.32%
Market share for cycling: 3.58%
Market share for public: 36.50%
Market share for driving: 41.61%


In [72]:
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3 + beta_cost*cost_transit*0.85 
V_driving = asc_driving + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving  + beta_drivingliscence * driving_license +beta_traffic*driving_traffic_percent
V_2 = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

In [73]:
market_share_2 = calculate_market_share(V_2)

In [74]:
market_share_2

[0.18291361665071748,
 0.03555594428298746,
 0.3651874591138524,
 0.4163429799524427]

In [75]:
for i,mode in enumerate(modes): 
    print(f'Market share for {mode}: {100*market_share_2[i]:.2f}%')

Market share for walk: 18.29%
Market share for cycling: 3.56%
Market share for public: 36.52%
Market share for driving: 41.63%


## Effect on revenue

In [321]:
def calculate_revenues(change, utility):
    prob_public = models.nested(utility, None, nests,3)   

    simulate = {
        'Weight': Weight,
        'Revenues PT': prob_public * cost_transit * change,
    }
    
    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())

    simulated_values['Weighted revenues PT'] = (
        simulated_values['Weight'] * simulated_values['Revenues PT']
    )
    
    revenues_PT = simulated_values['Weighted revenues PT'].sum()

    return revenues_PT

In [322]:
# scenario 0
calculate_revenues(1,V)

3507.1566369232214

In [323]:
# scenario 1
calculate_revenues(1,V_1)

3538.788890806477

In [324]:
# scenario 2
calculate_revenues(0.85,V_2)

3015.313863646474

## Aggregate elasticity

Disaggregate elasticity
$$
E_{x_{i n k}}^{P_{n}(i)}=\frac{\partial P_{n}(i)}{\partial x_{i n k}} \frac{x_{i n k}}{P_{n}(i)}
$$
Aggregate elasticity
$$
\begin{aligned}
E_{x_{j k}}^{\widehat{x^{\prime}}(i)} & =\frac{\partial \widehat{W}(i)}{\partial x_{j k}} \frac{x_{j k}}{\widehat{W}(i)} \\
& =\frac{1}{S} \sum_{n=1}^{N} \omega_{n} \frac{P_{n}(i)}{P_{n}(i)} \frac{\partial P_{n}(i)}{\partial x_{j k}} \frac{S x_{j k}}{\sum_{\ell=1}^{N} \omega_{\ell} P_{\ell}(i)} \\
& =\frac{1}{\sum_{\ell=1}^{N} \omega_{\ell} P_{\ell}(i)} \sum_{n=1}^{N} \omega_{n} P_{n}(i) E_{x_{j n k}}^{P_{n}(i)} .
\end{aligned}
$$

In [53]:
from biogeme.expressions import Derive

In [54]:
def calculate_direct_elasticity(utility):
    
    prob_public = models.nested(utility, None, nests,3)
    prob_driving = models.nested(utility, None, nests,4)
    

    simulate = {
    'Weight': Weight,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving,
    'Dis. Elas. public': Derive(prob_public,'cost_transit') * cost_transit / prob_public,
    'Dis. Elas. driving': Derive(prob_driving, 'cost_driving') * cost_driving / prob_driving,
    }


    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())
    
    simulated_values['Weighted public'] = simulated_values['Weight']*simulated_values['Prob. public']
    simulated_values['Weighted driving'] = simulated_values['Weight']*simulated_values['Prob. driving']
    
    norm_public = simulated_values['Weighted public'].sum()
    norm_driving = simulated_values['Weighted driving'].sum()

    Aggregate_elas_public = (simulated_values['Weighted public']*simulated_values['Dis. Elas. public']).sum()/norm_public
    Aggregate_elas_driving = (simulated_values['Weighted driving']*simulated_values['Dis. Elas. driving']).sum()/norm_driving

    return Aggregate_elas_public, Aggregate_elas_driving

In [55]:
calculate_direct_elasticity(V)

(-0.04663760711164039, -0.04129836004970217)

In [56]:
def calculate_cross_elasticity(utility):
    
    prob_public = models.nested(utility, None, nests,3)
    prob_driving = models.nested(utility, None, nests,4)
    

    simulate = {
    'Weight': Weight,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving,
    'Dis. Elas. public': Derive(prob_public,'cost_driving') * cost_driving / prob_public,
    'Dis. Elas. driving': Derive(prob_driving, 'cost_transit') * cost_transit / prob_driving,
    }


    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())
    
    simulated_values['Weighted public'] = simulated_values['Weight']*simulated_values['Prob. public']
    simulated_values['Weighted driving'] = simulated_values['Weight']*simulated_values['Prob. driving']
    
    norm_public = simulated_values['Weighted public'].sum()
    norm_driving = simulated_values['Weighted driving'].sum()

    Aggregate_elas_public = (simulated_values['Weighted public']*simulated_values['Dis. Elas. public']).sum()/norm_public
    Aggregate_elas_driving = (simulated_values['Weighted driving']*simulated_values['Dis. Elas. driving']).sum()/norm_driving

    return Aggregate_elas_public, Aggregate_elas_driving

In [57]:
calculate_cross_elasticity(V)

(0.0436694327491096, 0.03596605131045157)