In [1]:
import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta, Variable, log, exp
from biogeme import models
from biogeme import results as res

In [204]:
#data_file = "http://transp-or.epfl.ch/data/lpmc.dat"
data_file='lpmc10.dat'
lpmc = pd.read_csv(data_file, sep='\t')
lpmc

Unnamed: 0,trip_id,household_id,person_n,trip_n,travel_mode,purpose,fueltype,faretype,bus_scale,survey_year,...,dur_pt_access,dur_pt_rail,dur_pt_bus,dur_pt_int,pt_interchanges,dur_driving,cost_transit,cost_driving_fuel,cost_driving_ccharge,driving_traffic_percent
0,20,5,1,0,4,3,1,5,0.0,1,...,0.381667,0.000000,0.062222,0.000000,0,0.117222,0.00,0.41,0.0,0.097156
1,41,9,3,0,4,3,1,5,0.0,1,...,0.146944,0.000000,0.225000,0.000000,0,0.200833,0.00,0.48,0.0,0.378976
2,69,13,2,1,4,3,1,1,1.0,1,...,0.029444,0.083333,0.735833,0.398056,3,0.716944,6.00,2.16,0.0,0.582720
3,102,20,2,0,2,3,1,1,1.0,1,...,0.339722,0.183333,0.116667,0.266667,1,0.250833,3.00,0.89,0.0,0.170543
4,105,21,0,1,4,3,1,1,1.0,1,...,0.126389,0.000000,0.150000,0.000000,0,0.125833,1.50,0.37,0.0,0.154525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,80998,17591,0,5,4,3,1,5,0.0,3,...,0.189167,0.000000,0.117778,0.000000,0,0.082500,0.00,0.22,10.5,0.239057
4996,81000,17592,0,0,3,3,6,5,0.0,3,...,0.105278,0.000000,0.220278,0.000000,0,0.213611,0.00,0.52,0.0,0.412224
4997,81015,17597,0,3,4,3,1,5,0.0,3,...,0.343056,0.000000,0.177500,0.000000,0,0.189444,0.00,0.76,0.0,0.086510
4998,81041,17604,2,4,3,1,1,2,0.0,3,...,0.344444,0.316667,0.000000,0.083333,1,0.386111,1.05,0.98,0.0,0.340288


In [205]:
database = db.Database('trips', lpmc)

In [45]:
lpmc.columns

Index(['trip_id', 'household_id', 'person_n', 'trip_n', 'travel_mode',
       'purpose', 'fueltype', 'faretype', 'bus_scale', 'survey_year',
       'travel_year', 'travel_month', 'travel_date', 'day_of_week',
       'start_time', 'age', 'female', 'driving_license', 'car_ownership',
       'distance', 'dur_walking', 'dur_cycling', 'dur_pt_access',
       'dur_pt_rail', 'dur_pt_bus', 'dur_pt_int', 'pt_interchanges',
       'dur_driving', 'cost_transit', 'cost_driving_fuel',
       'cost_driving_ccharge', 'driving_traffic_percent'],
      dtype='object')

In [206]:
# define variables
for col in lpmc.columns:
    globals()[col]=Variable(col)

# Model 0

In [5]:
asc_walk = Beta('asc_walk', 0, None, None, 1)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [207]:
cost_driving = cost_driving_fuel + cost_driving_ccharge #total costs of driving
dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #total duration of public transportation

In [7]:
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_time = Beta('beta_time', 0, None, None, 0)

In [8]:
V_walk = asc_walk + beta_time * dur_walking
V_cycling = asc_cycling + beta_time * dur_cycling
V_driving = asc_driving + beta_time * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time * dur_public + beta_cost * cost_transit

In [9]:
V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

# define choice sets for individuals
# av = {1: 1, 2: 1, 3: 1, 4: driving_license}

logprob = models.loglogit(V, None, travel_mode)

In [10]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_base'

In [11]:
results0 = biogeme.estimate()



In [12]:
print(results0.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4722.972
Likelihood ratio test for the init. model:	4417
Rho-square for the init. model:	0.319
Rho-square-bar for the init. model:	0.318
Akaike Information Criterion:	9455.944
Bayesian Information Criterion:	9488.53
Final gradient norm:	1.0691E-03
Nbr of threads:	16



In [13]:
results0.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-3.660824,0.100795,-36.319469,0.0
asc_driving,-1.335138,0.076403,-17.474949,0.0
asc_public,-0.559066,0.05262,-10.624628,0.0
beta_cost,-0.14985,0.01378,-10.87426,0.0
beta_time,-5.410269,0.188336,-28.726735,0.0


In [14]:
# results0.data.htmlFileName

# Model 1


In [15]:
asc_walk = Beta('asc_walk', 0, None, None, 1)
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [16]:
cost_driving = cost_driving_fuel + cost_driving_ccharge
dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int #total duration of public transportation

In [17]:
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)
beta_time_public = Beta('beta_time_public', 0, None, None, 0)

In [18]:
V_walk = asc_walk + beta_time_walk * dur_walking
V_cycling = asc_cycling + beta_time_cycling * dur_cycling
V_driving = asc_driving + beta_time_driving * dur_driving + beta_cost * cost_driving
V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit

In [19]:
V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

In [20]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_1'

In [21]:
results1 = biogeme.estimate()



In [22]:
print(results1.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4412.008
Likelihood ratio test for the init. model:	5038.928
Rho-square for the init. model:	0.363
Rho-square-bar for the init. model:	0.362
Akaike Information Criterion:	8840.016
Bayesian Information Criterion:	8892.153
Final gradient norm:	2.5518E-02
Nbr of threads:	16



In [23]:
results1.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-4.590519,0.179036,-25.640219,0.0
asc_driving,-2.072023,0.120163,-17.243377,0.0
asc_public,-2.439478,0.121947,-20.004412,0.0
beta_cost,-0.14178,0.015193,-9.331745,0.0
beta_time_cycling,-5.195891,0.423907,-12.25716,0.0
beta_time_driving,-5.875433,0.359795,-16.329946,0.0
beta_time_public,-3.200593,0.230746,-13.870609,0.0
beta_time_walk,-8.367591,0.3601,-23.236825,0.0


In [24]:
# results1.data.htmlFileName

## Comparing Models 0 and 1

In [25]:
general_statistics_model_0 = results0.getGeneralStatistics()
print(results0.printGeneralStatistics())

Number of estimated parameters:	5
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4722.972
Likelihood ratio test for the init. model:	4417
Rho-square for the init. model:	0.319
Rho-square-bar for the init. model:	0.318
Akaike Information Criterion:	9455.944
Bayesian Information Criterion:	9488.53
Final gradient norm:	1.0691E-03
Nbr of threads:	16



In [26]:
general_statistics_model_1 = results1.getGeneralStatistics()
print(results1.printGeneralStatistics())

Number of estimated parameters:	8
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4412.008
Likelihood ratio test for the init. model:	5038.928
Rho-square for the init. model:	0.363
Rho-square-bar for the init. model:	0.362
Akaike Information Criterion:	8840.016
Bayesian Information Criterion:	8892.153
Final gradient norm:	2.5518E-02
Nbr of threads:	16



### Likelihood ratio test

$H_O$: $\beta\_time\_walk=\beta\_time\_driving=\beta\_time\_public=\beta\_time\_cycling$

Because model 0 is an restricted version of model 1, we can apply the likelihood ratio test.

In [27]:
alpha=0.05
results1.likelihood_ratio_test(results0, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=621.9276889295215, threshold=7.814727903251179)

According to the test result, the null hypothesis is rejected at the 5% level.

# Model 2

1. One alternative attribute: pt_interchanges for public transport  
The assumption is that the number of interchanges is relevant to the overall experience of taking public transport. 
2. One socio-economic characteristics: driving_liscence to interact with ASC_driving  
The assumption is that having a driving lisence changes the ...?

Model 2 specification:

$ V_{walk}=ASC_{walk}+\beta_{time\_walk}time_{walk} $

$ V_{cycling}=ASC_{cycling}+\beta_{time\_cycle}time_{cycle} $

$ V_{public}=ASC_{public}+\beta_{time\_public}time_{public}+\beta_{cost}cost_{public}+\beta_{traffic}driving\_traffic\_percent$

$ V_{driving}=ASC_{driving}+\beta_{time\_driving}time_{driving}+\beta_{cost}cost_{driving}+\beta_{driving_liscence}drivingliscence$


In [28]:
asc_walk = Beta('asc_walk', 0, None, None, 1) # Normalizing asc_walk to 0
asc_cycling = Beta('asc_cycling', 0, None, None, 0)
asc_public = Beta('asc_public', 0, None, None, 0)
asc_driving = Beta('asc_driving', 0, None, None, 0)

In [29]:
cost_driving = cost_driving_fuel + cost_driving_ccharge
dur_public = dur_pt_access + dur_pt_rail + dur_pt_bus + dur_pt_int # total duration of public transportation

In [30]:
beta_time_walk = Beta('beta_time_walk', 0, None, None, 0)
beta_time_cycling = Beta('beta_time_cycling', 0, None, None, 0)
beta_time_driving = Beta('beta_time_driving', 0, None, None, 0)
beta_time_public = Beta('beta_time_public', 0, None, None, 0)
beta_cost = Beta('beta_cost', 0, None, None, 0)
beta_interchange = Beta('beta_interchange',0, None, None,0)
beta_drivingliscence = Beta('beta_drivingliscence',0,None,None,0)
beta_faretype = Beta('beta_faretype',0, None, None, 0)
beta_traffic = Beta('beta_traffic',0, None, None, 0)

In [31]:
# model 2 specification 
V_walk = asc_walk + beta_time_walk * dur_walking
V_cycling = asc_cycling + beta_time_cycling * dur_cycling
V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit + beta_interchange * pt_interchanges
# V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit
V_driving = asc_driving + beta_time_driving * dur_driving + beta_cost * cost_driving + beta_drivingliscence * driving_license

V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

biogeme.modelName = 'model_2'

In [32]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_2'

In [33]:
results2 = biogeme.estimate()



In [34]:
print(results2.printGeneralStatistics())

Number of estimated parameters:	10
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4207.638
Likelihood ratio test for the init. model:	5447.667
Rho-square for the init. model:	0.393
Rho-square-bar for the init. model:	0.392
Akaike Information Criterion:	8435.277
Bayesian Information Criterion:	8500.449
Final gradient norm:	1.1388E-02
Nbr of threads:	16



In [35]:
results2.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-4.658217,0.185018,-25.17705,0.0
asc_driving,-2.925344,0.134301,-21.781937,0.0
asc_public,-2.495342,0.125829,-19.83125,0.0
beta_cost,-0.130483,0.013882,-9.399273,0.0
beta_drivingliscence,1.359169,0.069275,19.619925,0.0
beta_interchange,-0.030145,0.085143,-0.354053,0.723299
beta_time_cycling,-5.400914,0.443234,-12.185248,0.0
beta_time_driving,-6.436766,0.374004,-17.210409,0.0
beta_time_public,-3.393491,0.259001,-13.10221,0.0
beta_time_walk,-8.564553,0.370558,-23.112572,0.0


In [36]:
# results2.data.htmlFileName

## Compare model 2 and 1
Since model 1 is a restricted version of model 2, we perform a likelihood ratio test. Or we can simply perform t-test on the coefficients $\beta_{driving\_liscence}$ and $\beta_{pt_interchanges}$, as given by the estimation table.

In [37]:
results2.likelihood_ratio_test(results1, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=408.7390116578699, threshold=5.991464547107979)

Thus, model 2 is preferred to model 1.

# Model 3

We perform box-cox transformation on travel time for every alternatives.

In [38]:
# define lambda
lambda_boxcox = Beta('lambda_boxcox', 1, None, None, 0)
boxcox_time_1 = models.boxcox(dur_walking, lambda_boxcox)
boxcox_time_2 = models.boxcox(dur_cycling, lambda_boxcox)
boxcox_time_3 = models.boxcox(dur_public, lambda_boxcox)
boxcox_time_4 = models.boxcox(dur_driving, lambda_boxcox)

In [184]:
# model 3 specification 
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3+ beta_cost * cost_transit + beta_interchange * pt_interchanges
# V_public = asc_public + beta_time_public * dur_public + beta_cost * cost_transit
V_driving = asc_driving + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving + beta_drivingliscence * driving_license

V = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

logprob = models.loglogit(V, None, travel_mode)

biogeme.modelName = 'model_3'

In [40]:
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_3'
results3 = biogeme.estimate()
print(results3.printGeneralStatistics())


Number of estimated parameters:	11
Sample size:	5000
Excluded observations:	0
Init log likelihood:	-6931.472
Final log likelihood:	-4135.707
Likelihood ratio test for the init. model:	5591.529
Rho-square for the init. model:	0.403
Rho-square-bar for the init. model:	0.402
Akaike Information Criterion:	8293.415
Bayesian Information Criterion:	8365.104
Final gradient norm:	2.5246E-02
Nbr of threads:	16



In [41]:
results3.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
asc_cycling,-2.195568,0.276631,-7.936817,1.998401e-15
asc_driving,-1.025547,0.223983,-4.578676,4.679293e-06
asc_public,1.779555,0.172816,10.297423,0.0
beta_cost,-0.120172,0.01358,-8.849079,0.0
beta_drivingliscence,1.365768,0.069714,19.591049,0.0
beta_interchange,-0.087201,0.076757,-1.13607,0.2559271
beta_time_cycling,-3.383673,0.262832,-12.87391,0.0
beta_time_driving,-3.564961,0.268131,-13.295614,0.0
beta_time_public,-2.603897,0.187886,-13.858937,0.0
beta_time_walk,-5.625187,0.250273,-22.476169,0.0


## Compare model 3 and 2
Under the null hypothesis that $\lambda=1$, the statistic $$\frac{\widehat{\lambda}-1}{\widehat{\sigma}_\lambda}$$ follows approximatively a $N(0, 1)$.  
Thus, we perform a t-test on $\lambda$.

In [69]:
estimated_parameters_3=results3.getEstimatedParameters()
t_test_lambda = (estimated_parameters_3.loc['lambda_boxcox','Value'] - 1) / estimated_parameters_3.loc['lambda_boxcox', 'Rob. Std err']
t_test_lambda

-14.600459948421262

In [70]:
from scipy.stats import norm
from biogeme.results import calcPValue

In [71]:
norm.ppf(.95)

1.6448536269514722

In [72]:
calcPValue(t_test_lambda)

0.0

Because the $p-value<0.05$, we reject the null hypothesis that $\lambda=1$. Thus, model 3 is preferred.

Though not exactly relevant as one is not the linear restriction of the other, we also perform a likelihood ratio test to check.

In [73]:
results3.likelihood_ratio_test(results2, alpha)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=143.8621227926742, threshold=3.841458820694124)

A more general way is to perform a **Davidson and McKinnon J test**  on model 2 and 3. 

# Model 4.a

We define Model 4 as a nested version of Model 3 where we regroup motorized (public transports and car) as a nest, and walking and cycling as two other nests.

In [191]:
MU_motorized = Beta('MU_motorized', 1, 0, None, 0)
motorized = MU_motorized, [3, 4]
non_motorized = 1, [1,2]
nests = motorized, non_motorized
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_4'
results4 = biogeme.estimate(recycle=True)

In [70]:
results4.getEstimatedParameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
MU_motorized,2.335802,0.355732,6.56618,5.162248e-11
asc_cycling,-1.028362,0.287317,-3.579186,0.0003446663
asc_driving,1.454515,0.37181,3.911983,9.154152e-05
asc_public,2.798889,0.205713,13.605808,0.0
beta_cost,-0.043889,0.010578,-4.149049,3.338595e-05
beta_drivingliscence,0.700806,0.101105,6.931475,4.164669e-12
beta_interchange,-0.041999,0.035403,-1.18631,0.2354999
beta_time_cycling,-1.846381,0.245251,-7.528526,5.12923e-14
beta_time_driving,-1.599725,0.261187,-6.124825,9.07834e-10
beta_time_public,-1.228065,0.195886,-6.26929,3.626974e-10


## Compare model 4 and 3

In [55]:
# t-test
nested_existing_table = results4.getEstimatedParameters()
mu_moto = nested_existing_table.loc['MU_motorized', 'Value']
mu_non_moto = 
mu

2.3374367362643778

In [56]:
mu_stderr = nested_existing_table.loc['MU_motorized', 'Rob. Std err']
tested_value = 1
ttest = (tested_value - mu) / mu_stderr
ttest

-3.7559286301770047

Therefore, the null hypothesis is rejected at the 5% level.

In [63]:
# likelihood ratio test
results4.likelihood_ratio_test(results3, significance_level=0.05)

LRTuple(message='H0 can be rejected at level 5.0%', statistic=65.57326490587366, threshold=5.991464547107979)

# Model 4.b

In [66]:
MU_private = Beta('MU_private', 1, 0, None, 0)
private = MU_private, [1,2,4]
public = 1.0, [3]
nests = private,public
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_5'
results5 = biogeme.estimate(recycle=True)
results5.getEstimatedParameters()



Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
MU_private,0.658648,0.047331,13.915754,0.0
asc_cycling,-1.970005,0.3883,-5.07341,3.907499e-07
asc_driving,0.062223,0.372949,0.16684,0.8674958
asc_public,3.109905,0.373997,8.315323,0.0
beta_cost,-0.116447,0.014955,-7.786632,6.883383e-15
beta_drivingliscence,1.672988,0.098823,16.929109,0.0
beta_interchange,-0.035473,0.081264,-0.436515,0.6624633
beta_time_cycling,-3.518231,0.312428,-11.260925,0.0
beta_time_driving,-3.796774,0.290092,-13.088188,0.0
beta_time_public,-3.097268,0.229422,-13.500328,0.0


Since $\mu<1$, we reject this hypothesis. 

# Sampling

## Calculate the weight for each segment

In [211]:
# Population statistics

census = {
    'male_41_more':  1633263,
    'male_40_less':  2676249,
    'female_41_more':  1765143,
    'female_40_less':  2599058
}

In [212]:
total = sum(census.values())
total

8673713

In [213]:
lpmc["40_less"]=lpmc["age"]<=40
lpmc["40_less"]

0       False
1        True
2        True
3        True
4       False
        ...  
4995    False
4996    False
4997    False
4998     True
4999     True
Name: 40_less, Length: 5000, dtype: bool

In [214]:
# sample statistics
filters = {
    'male_41_more': (lpmc['40_less'] == 0) & (lpmc['female'] == 0),
    'male_40_less': (lpmc['40_less'] == 1) & (lpmc['female'] == 0),
    'female_41_more': (lpmc['40_less'] == 0) & (lpmc['female'] == 1),
    'female_40_less': (lpmc['40_less'] == 1) & (lpmc['female'] == 1)
}

In [215]:
sample_segments = {
    k: v.sum() for k, v in filters.items()
}
sample_segments

{'male_41_more': 1084,
 'male_40_less': 1256,
 'female_41_more': 1204,
 'female_40_less': 1456}

In [216]:
total_sample = sum(sample_segments.values())
total_sample

5000

The weight $w_g$ associated with segment $g$ is defined as
$$
w_g = \frac{N_g}{N}\frac{S}{S_g}.
$$

In [217]:
# weight associated with each segment
weights = {
    k: census[k] * total_sample / (v * total) 
    for k, v in sample_segments.items()
}
weights

{'male_41_more': 0.86854394680908,
 'male_40_less': 1.2282925990376248,
 'female_41_more': 0.8451199703160228,
 'female_40_less': 1.029009899043413}

In [218]:
# insert weight as a new column to the database lpmc
for k, f in filters.items():
    lpmc.loc[f, 'Weight'] = weights[k] 

## Calculate predicted market share for each mode

In [129]:
MU_motorized = Beta('MU_motorized', 1, 0, None, 0)
motorized = MU_motorized, [3, 4]
non_motorized = 1, [1,2]
nests = motorized, non_motorized
logprob = models.lognested(V, None, nests, travel_mode)
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'model_4'
results4 = biogeme.estimate(recycle=True)

In [105]:
prob_walk = models.nested(V, None, nests,1)
prob_cycling = models.nested(V, None, nests,2)
prob_public = models.nested(V, None, nests,3)
prob_driving = models.nested(V, None, nests,4)

In [106]:
Weight = Variable('Weight')
simulate = {
    'Weight': Weight,
    'Prob. walk': prob_walk,
    'Prob. cycling': prob_cycling,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving
}

In [107]:
biosim = bio.BIOGEME(database, simulate)
simulated_values = biosim.simulate(results4.getBetaValues())

In [108]:
simulated_values

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving
0,0.845120,0.056122,0.033443,0.062988,0.847448
1,1.029010,0.234590,0.075104,0.466893,0.223413
2,1.228293,0.000447,0.035738,0.623996,0.339819
3,1.228293,0.003664,0.021864,0.049406,0.925067
4,0.868544,0.092890,0.043082,0.152466,0.711563
...,...,...,...,...,...
4995,0.845120,0.386344,0.035924,0.108761,0.468971
4996,0.845120,0.051657,0.045394,0.693246,0.209703
4997,0.868544,0.004401,0.018260,0.137395,0.839944
4998,1.228293,0.024476,0.059768,0.606441,0.309314


Market shares are calculated using the weighted mean of the
individual probabilities.

In [111]:
modes = ["walk","cycling","public","driving"]
for mode in modes:
    simulated_values['Weighted '+mode] = (
        simulated_values['Weight'] * 
        simulated_values['Prob. '+mode]
    )

In [112]:
simulated_values

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving,Weighted walk,Weighted cycling,Weighted public,Weighted driving
0,0.845120,0.056122,0.033443,0.062988,0.847448,0.047430,0.028263,0.053232,0.716195
1,1.029010,0.234590,0.075104,0.466893,0.223413,0.241395,0.077283,0.480437,0.229894
2,1.228293,0.000447,0.035738,0.623996,0.339819,0.000549,0.043897,0.766450,0.417397
3,1.228293,0.003664,0.021864,0.049406,0.925067,0.004501,0.026855,0.060685,1.136252
4,0.868544,0.092890,0.043082,0.152466,0.711563,0.080679,0.037418,0.132423,0.618023
...,...,...,...,...,...,...,...,...,...
4995,0.845120,0.386344,0.035924,0.108761,0.468971,0.326507,0.030360,0.091916,0.396337
4996,0.845120,0.051657,0.045394,0.693246,0.209703,0.043656,0.038363,0.585876,0.177225
4997,0.868544,0.004401,0.018260,0.137395,0.839944,0.003822,0.015859,0.119334,0.729529
4998,1.228293,0.024476,0.059768,0.606441,0.309314,0.030064,0.073413,0.744888,0.379928


In [139]:
market_share=[]
for mode in modes: 
    globals()['market_share_'+mode]=simulated_values['Weighted '+ mode].mean()
    market_share.append(globals()['market_share_'+mode])
    print(f'Market share for {mode}: {100*market_share[-1]:.1f}%')

Market share for walk: 18.3%
Market share for cycling: 3.6%
Market share for public: 36.2%
Market share for driving: 42.0%


## Calcualte confidence interval

In [131]:
results_bootstrapping = biogeme.estimate(bootstrap=10)

In [132]:
betas = biogeme.freeBetaNames()
b = results_bootstrapping.getBetasForSensitivityAnalysis(betas)

In [133]:
left, right = biosim.confidenceIntervals(b, 0.9)

In [134]:
left

Unnamed: 0,Weight,Prob. walk,Prob. cycling,Prob. public,Prob. driving
0,0.845120,0.046945,0.029260,0.052399,0.828376
1,1.029010,0.219606,0.065776,0.427006,0.208789
2,1.228293,0.000236,0.026649,0.568711,0.275310
3,1.228293,0.002637,0.018107,0.042088,0.914553
4,0.868544,0.080983,0.038545,0.139068,0.687739
...,...,...,...,...,...
4995,0.845120,0.376519,0.031156,0.084221,0.412001
4996,0.845120,0.043325,0.039441,0.662516,0.196356
4997,0.868544,0.003216,0.015122,0.118904,0.820229
4998,1.228293,0.019110,0.050994,0.571370,0.300906


In [135]:
for mode in modes:
    left['Weighted '+mode] = (
        left['Weight'] * 
        left['Prob. '+mode]
    )
    right['Weighted '+mode] = (
        right['Weight'] * 
        right['Prob. '+mode]
    )

In [140]:
left_market_share=[]
right_market_share=[]
for num,mode in enumerate(modes): 
    globals()['left_market_share_'+mode]=left['Weighted '+ mode].mean()
    globals()['right_market_share_'+mode]=right['Weighted '+ mode].mean()
    left_market_share.append(globals()['left_market_share_'+mode])
    right_market_share.append(globals()['right_market_share_'+mode])
    print(
    f'Market share for {mode}: {100*market_share[num]:.1f}% '
    f'CI: ['
    f'{100*left_market_share[num]:.1f}%-'
    f'{100*right_market_share[num]:.1f}'
    f']'
)

Market share for walk: 18.3% CI: [17.4%-19.3]
Market share for cycling: 3.6% CI: [3.1%-4.4]
Market share for public: 36.2% CI: [34.0%-38.0]
Market share for driving: 42.0% CI: [39.9%-43.9]


# Forcasting

## Consider two scenarios

In [172]:
# define a function for calculating market share
def calculate_market_share(utility):
    
    prob_walk = models.nested(utility, None, nests,1)
    prob_cycling = models.nested(utility, None, nests,2)
    prob_public = models.nested(utility, None, nests,3)
    prob_driving = models.nested(utility, None, nests,4)

    Weight = Variable('Weight')
    simulate = {
    'Weight': Weight,
    'Prob. walk': prob_walk,
    'Prob. cycling': prob_cycling,
    'Prob. public': prob_public,
    'Prob. driving': prob_driving
    }

    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())
    modes = ["walk","cycling","public","driving"]
    for mode in modes:
        simulated_values['Weighted '+mode] = (
            simulated_values['Weight'] * 
            simulated_values['Prob. '+mode]
        )
    market_share=[]
    for mode in modes: 
        globals()['market_share_'+mode]=simulated_values['Weighted '+ mode].mean()
        market_share.append(globals()['market_share_'+mode])
    
    return market_share

In [173]:
# an increase of car cost by 15%
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3+ beta_cost * cost_transit + beta_interchange * pt_interchanges
V_driving = asc_driving + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving*1.15 + beta_drivingliscence * driving_license
V_1 = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

In [174]:
market_share_1 = calculate_market_share(V_1)

In [180]:
market_share_1

[0.18303587308894248,
 0.03570537602583732,
 0.3653452279480938,
 0.4159135229371264]

In [175]:
for i,mode in enumerate(modes): 
    print(f'Market share for {mode}: {100*market_share_1[i]:.1f}%')

Market share for walk: 18.3%
Market share for cycling: 3.6%
Market share for public: 36.5%
Market share for driving: 41.6%


In [176]:
# a decrease of public transport cost by 15%
# an increase of car cost by 15%
V_walk = asc_walk + beta_time_walk * boxcox_time_1
V_cycling = asc_cycling + beta_time_cycling * boxcox_time_2
V_public = asc_public + beta_time_public * boxcox_time_3+ beta_cost * cost_transit*0.85 + beta_interchange * pt_interchanges
V_driving = asc_driving + beta_time_driving * boxcox_time_4 + beta_cost * cost_driving + beta_drivingliscence * driving_license
V_2 = {1: V_walk, 2: V_cycling, 3: V_public, 4: V_driving}

In [177]:
market_share_2 = calculate_market_share(V_2)

In [178]:
market_share_2

[0.1826834260957323,
 0.03547092855513872,
 0.3655157663325278,
 0.4163298790166012]

In [179]:
for i,mode in enumerate(modes): 
    print(f'Market share for {mode}: {100*market_share_2[i]:.1f}%')

Market share for walk: 18.3%
Market share for cycling: 3.5%
Market share for public: 36.6%
Market share for driving: 41.6%


## Effect on revenue

In [181]:
def calculate_revenues(change, utility):
    prob_public = models.nested(utility, None, nests,3)   

    simulate = {
        'weight': Weight,
        'Revenues PT': prob_public * cost_transit * change,
    }
    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())

    simulated_values['Weighted revenues PT'] = (
        simulated_values['weight'] * simulated_values['Revenues PT']
    )
    
    revenues_PT = simulated_values['Weighted revenues PT'].mean()

    return revenues_PT

In [182]:
# scenario 1
calculate_revenues(1,V_1)

0.697665362122266

In [183]:
# scenario 2
calculate_revenues(0.85,V_2)

0.5948704833553018

In [185]:
# scenario 0
calculate_revenues(1,V)

0.6888564497933853

## Aggregate elasticity

Disaggregate elasticity
$$
E_{x_{i n k}}^{P_{n}(i)}=\frac{\partial P_{n}(i)}{\partial x_{i n k}} \frac{x_{i n k}}{P_{n}(i)}
$$
Aggregate elasticity
$$
\begin{aligned}
E_{x_{j k}}^{\widehat{x^{\prime}}(i)} & =\frac{\partial \widehat{W}(i)}{\partial x_{j k}} \frac{x_{j k}}{\widehat{W}(i)} \\
& =\frac{1}{S} \sum_{n=1}^{N} \omega_{n} \frac{P_{n}(i)}{P_{n}(i)} \frac{\partial P_{n}(i)}{\partial x_{j k}} \frac{S x_{j k}}{\sum_{\ell=1}^{N} \omega_{\ell} P_{\ell}(i)} \\
& =\frac{1}{\sum_{\ell=1}^{N} \omega_{\ell} P_{\ell}(i)} \sum_{n=1}^{N} \omega_{n} P_{n}(i) E_{x_{j n k}}^{P_{n}(i)} .
\end{aligned}
$$

In [186]:
from biogeme.expressions import Derive

In [221]:
def calculate_direct_elasticity(utility):
    
    prob_public = models.nested(utility, None, nests,3)
    prob_driving = models.nested(utility, None, nests,4)
    
    simulate = {
    'Weight': Weight,
    'Prob. public': prob_public,
    # 'Prob. driving': prob_driving,
    'Dis. Elas. public': Derive(prob_public,'cost_transit') * cost_transit / prob_public,
    # 'Dis. Elas. driving': Derive(prob_driving, 'cost_driving') * cost_driving / prob_driving,
    }


    biosim = bio.BIOGEME(database, simulate)
    simulated_values = biosim.simulate(results4.getBetaValues())
    
    simulated_values['Weighted public'] = simulated_values['Weight']*simulated_values['Prob. public']
    # simulated_values['Weighted driving'] = simulated_values['Weight']*simulated_values['Prob. driving']
    
    norm_public = simulated_values['Weighted public'].sum()
    # norm_driving = simulated_values['Weighted driving'].sum()

    Aggregate_elas_public = (simulated_values['Weighted public']*simulated_values['Dis. Elas. public']).sum()/norm_public
    # Aggregate_elas_driving = (simulated_values['Weighted driving']*simulated_values['Dis. Elas. driving']).sum()/norm_driving

    return Aggregate_elas_public

In [222]:
calculate_direct_elasticity(V)

-0.08000887519179997