In [1]:
import pandas as pd
import numpy as np

In [2]:
input_path = "../../results/choice_model/input.parquet"
df = pd.read_parquet(input_path)

modes = ["car", "car_passenger", "transit", "bicycle", "walk"]

for column in df.columns:
    if column.startswith("motorbike_"):
        modes.append("motorbike")
        break

In [3]:
for mode in modes:
    df["{}_availability".format(mode)] = df["{}_availability".format(mode)].astype(int)

df["mode"] = df["mode"].apply(modes.index)
df["weight"] = len(df) * df["weight"] / df["weight"].sum()

df = df.drop(columns = ["origin_iris", "destination_iris"])

for column in ["has_driving_permit", "has_motorbike_permit", "has_pt_subscription", "transit_only_bus"]:
    if column in df:
        df[column] = df[column].astype(int)

In [4]:
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
from biogeme.expressions import Beta, Variable, bioNormalCdf, Elem, log

In [5]:
database = db.Database("data", df)

mode = db.Variable("mode")
weight = db.Variable("weight")
euclidean_distance_km = db.Variable("euclidean_distance_km")

# person
person_has_driving_permit = db.Variable("has_driving_permit")

# income
if "income_per_person_EUR" in df:
    income_per_person_EUR = db.Variable("income_EUR")

# car
car_availability = db.Variable("car_availability")
car_in_vehicle_time_min = db.Variable("car_in_vehicle_time_min")
car_walk_time_min = db.Variable("car_walk_time_min")
car_cost_EUR = db.Variable("car_cost_EUR")

# parking
parking_cost_EUR = db.Variable("parking_cost_EUR")
parking_pressure = db.Variable("parking_pressure")

# car passenger
car_passenger_availability = db.Variable("car_passenger_availability")
car_passenger_in_vehicle_time_min = db.Variable("car_passenger_in_vehicle_time_min")
car_passenger_walk_time_min = db.Variable("car_passenger_walk_time_min")


if "motorbike" in modes:
    # motorbike
    motorbike_availability = db.Variable("motorbike_availability")
    motorbike_in_vehicle_time_min = db.Variable("motorbike_in_vehicle_time_min")
    motorbike_walk_time_min = db.Variable("motorbike_walk_time_min")
    motorbike_cost_EUR = db.Variable("motorbike_cost_EUR")

# transit
transit_availability = db.Variable("transit_availability")
transit_total_walk_time_min = db.Variable("transit_total_walk_time_min")
transit_total_in_vehicle_time_min = db.Variable("transit_total_in_vehicle_time_min")
transit_transfers = db.Variable("transit_transfers")
transit_transfer_wait_time_min = db.Variable("transit_transfer_wait_time_min")
transit_initial_wait_time_min = db.Variable("transit_initial_wait_time_min")
transit_cost_EUR = db.Variable("transit_cost_EUR")
transit_in_vehicle_time_bus_min = db.Variable("transit_in_vehicle_time_bus_min")
transit_only_bus = db.Variable("transit_only_bus")

# bicycle
bicycle_availability = db.Variable("bicycle_availability")
bicycle_travel_time_min = db.Variable("bicycle_travel_time_min")

# walk
walk_availability = db.Variable("walk_availability")
walk_travel_time_min = db.Variable("walk_travel_time_min")

In [6]:
lambda_cost_distance = Beta("lambda_cost_distance", -0.1, None, None, 0)
lambda_cost_income = Beta("lambda_cost_income", -0.1, None, None, 0)

beta_cost_EUR = Beta("beta_cost_EUR", 0, None, None, 0)

beta_car_asc = Beta("beta_car_asc", 0, None, None, 0)
beta_car_in_vehicle_time_min = Beta("beta_car_in_vehicle_time_min", 0, None, None, 0)
beta_car_walk_time_min = Beta("beta_car_walk_time_min", 0, None, None, 0)
beta_car_parking_pressure = Beta("beta_car_parking_pressure", 0, None, None, 0)

beta_car_passenger_asc = Beta("beta_car_passenger_asc", 0, None, None, 0)
beta_car_passenger_in_vehicle_time_min = Beta("beta_car_passenger_in_vehicle_time_min", 0, None, None, 0)
beta_car_passenger_walk_time_min = Beta("beta_car_passenger_walk_time_min", 0, None, None, 0)
beta_car_passenger_parking_pressure = Beta("beta_car_passenger_parking_pressure", 0, None, None, 0)
beta_car_passenger_driving_permit = Beta("beta_car_passenger_driving_permit", 0, None, None, 0)

if "motorbike" in modes:
    beta_motorbike_asc = Beta("beta_motorbike_asc", 0, None, None, 0)
    beta_motorbike_in_vehicle_time_min = Beta("beta_motorbike_in_vehicle_time_min", 0, None, None, 0)
    beta_motorbike_walk_time_min = Beta("beta_motorbike_walk_time_min", 0, None, None, 0)

beta_transit_asc = Beta("beta_transit_asc", 0, None, None, 1)
beta_transit_total_walk_time_min = Beta("beta_transit_total_walk_time_min", 0, None, None, 0)
beta_transit_headway_min = Beta("beta_transit_headway_min", 0, None, None, 0)
beta_transit_total_in_vehicle_time_min = Beta("beta_transit_in_vehicle_time_total_min", 0, None, None, 0)
beta_transit_transfers = Beta("beta_transit_transfers", 0, None, None, 0)
beta_transit_waiting_time_min = Beta("beta_transit_waiting_time_min", 0, None, None, 0)
beta_transit_in_vehicle_time_bus_min = Beta("beta_transit_in_vehicle_time_bus_min", 0, None, None, 0)
beta_transit_only_bus = Beta("beta_transit_only_bus", 0, None, None, 0)
beta_transit_driving_permit = Beta("beta_transit_driving_permit", 0, None, None, 0)

beta_bicycle_asc = Beta("beta_bicycle_asc", 0, None, None, 0)
beta_bicycle_travel_time_min = Beta("beta_bicycle_travel_time_min", 0, None, None, 0)

beta_walk_asc = Beta("beta_walk_asc", 0, None, None, 0)
beta_walk_travel_time_min = Beta("beta_walk_travel_time_min", 0, None, None, 0)

beta_access_time_min = Beta("beta_access_time_min", 0, None, None, 0)
beta_car_walk_time_min = beta_access_time_min
beta_car_passenger_walk_time_min =  beta_access_time_min
beta_motorbike_walk_time_min = beta_access_time_min
beta_transit_total_walk_time_min =  beta_access_time_min

In [7]:
# Utility functions

mean_euclidean_distance_km = 4.4
euclidean_interaction_cost = (euclidean_distance_km / mean_euclidean_distance_km)**lambda_cost_distance
# euclidean_interaction_cost = 1

if "income_per_person_EUR" in df:
    mean_income_EUR = 2900
    income_interaction_cost = (income_per_person_EUR / mean_income_EUR)**lambda_cost_income
else:
    income_interaction_cost = 1

car_utility = beta_car_asc
car_utility += beta_car_in_vehicle_time_min * car_in_vehicle_time_min
car_utility += beta_cost_EUR * (car_cost_EUR + parking_cost_EUR) * euclidean_interaction_cost * income_interaction_cost
# car_utility += beta_car_parking_pressure * parking_pressure
car_utility += beta_car_walk_time_min * car_walk_time_min

car_passenger_utility = beta_car_passenger_asc
car_passenger_utility += beta_car_passenger_in_vehicle_time_min * car_passenger_in_vehicle_time_min
#car_passenger_utility += beta_car_passenger_parking_pressure * parking_pressure
car_passenger_utility += beta_car_passenger_driving_permit * person_has_driving_permit
car_passenger_utility += beta_car_passenger_walk_time_min * car_passenger_walk_time_min

if "motorbike" in modes:
    motorbike_utility = beta_motorbike_asc
    motorbike_utility += beta_motorbike_in_vehicle_time_min * motorbike_in_vehicle_time_min
    motorbike_utility += beta_cost_EUR * motorbike_cost_EUR * euclidean_interaction_cost * income_interaction_cost
    motorbike_utility += beta_motorbike_walk_time_min * motorbike_walk_time_min

transit_utility = beta_transit_asc
transit_utility += beta_transit_total_walk_time_min * transit_total_walk_time_min
transit_utility += beta_transit_transfers * transit_transfers
transit_utility += beta_transit_waiting_time_min * (transit_transfer_wait_time_min + transit_initial_wait_time_min)
transit_utility += beta_cost_EUR * transit_cost_EUR * euclidean_interaction_cost * income_interaction_cost
transit_utility += beta_transit_total_in_vehicle_time_min * transit_total_in_vehicle_time_min
transit_utility += beta_transit_only_bus * transit_only_bus
transit_utility += beta_transit_driving_permit * person_has_driving_permit

bicycle_utility = beta_bicycle_asc
bicycle_utility += beta_bicycle_travel_time_min * bicycle_travel_time_min

walk_utility = beta_walk_asc
walk_utility += beta_walk_travel_time_min * walk_travel_time_min

In [8]:
# Mapping
utilities = { 
    modes.index("car"): car_utility,
    modes.index("car_passenger"): car_passenger_utility,
    modes.index("transit"): transit_utility,
    modes.index("bicycle"): bicycle_utility,
    modes.index("walk"): walk_utility
}

if "motorbike" in modes:
    utilities[modes.index("motorbike")] = motorbike_utility

availability = { 
    modes.index("car"): car_availability,
    modes.index("car_passenger"): car_passenger_availability,
    modes.index("transit"): transit_availability,
    modes.index("bicycle"): bicycle_availability,
    modes.index("walk"): walk_availability
}

if "motorbike" in modes:
    availability[modes.index("motorbike")] = motorbike_availability

In [9]:
model = models.loglogit(utilities, availability, mode)

biogeme = bio.BIOGEME(database, {
    "loglike": model, "weight": weight
})

biogeme.calculate_null_loglikelihood(availability)

biogeme.generateHtml = True
biogeme.generate_pickle = False

result = biogeme.estimate()

You have not defined a name for the model. The output .py are named from the model name. The default is [biogemeModelDefaultName]


In [10]:
print(result.short_summary())

Results for model biogemeModelDefaultName
Nbr of parameters:		17
Sample size:			114487
Excluded data:			0
Null log likelihood:		-128479.7
Final log likelihood:		-61689.15
Likelihood ratio test (null):		133581.2
Rho square (null):			0.52
Rho bar square (null):			0.52
Akaike Information Criterion:	123412.3
Bayesian Information Criterion:	123576.3



In [11]:
result.get_estimated_parameters()

Unnamed: 0,Value,Rob. Std err,Rob. t-test,Rob. p-value
beta_access_time_min,-0.031203,0.001735,-17.988359,0.0
beta_bicycle_asc,-2.923113,0.065128,-44.882338,0.0
beta_bicycle_travel_time_min,-0.093555,0.003985,-23.477408,0.0
beta_car_asc,-0.197903,0.045327,-4.366094,1.264882e-05
beta_car_in_vehicle_time_min,-0.042451,0.00245,-17.325378,0.0
beta_car_passenger_asc,-1.711346,0.028403,-60.251747,0.0
beta_car_passenger_driving_permit,-0.833687,0.028403,-29.351811,0.0
beta_car_passenger_in_vehicle_time_min,-0.070003,0.001914,-36.574734,0.0
beta_cost_EUR,-0.311059,0.022975,-13.538747,0.0
beta_transit_driving_permit,-0.529718,0.034508,-15.350568,0.0


In [12]:
parameters = {
    index: row["Value"]
    for index, row in result.get_estimated_parameters().iterrows()
}

In [13]:
print("Car VOT", 60 * parameters["beta_car_in_vehicle_time_min"] / parameters["beta_cost_EUR"], "EUR/h")
print("Transit IV VOT", 60 * parameters["beta_transit_in_vehicle_time_total_min"] / parameters["beta_cost_EUR"], "EUR/h")
print("Walk VOT", 60 * parameters["beta_walk_travel_time_min"] / parameters["beta_cost_EUR"], "EUR/h")
print("Bicycle VOT", 60 * parameters["beta_bicycle_travel_time_min"] / parameters["beta_cost_EUR"], "EUR/h")
print("Car passenger VOT", 60 * parameters["beta_car_passenger_in_vehicle_time_min"] / parameters["beta_cost_EUR"], "EUR/h")

if "motorbike" in modes:
    print("Motorbike VOT", 60 * parameters["beta_motorbike_in_vehicle_time_min"] / parameters["beta_cost_EUR"], "EUR/h")

Car VOT 8.188422345612176 EUR/h
Transit IV VOT 4.937947342857951 EUR/h
Walk VOT 31.298476611626324 EUR/h
Bicycle VOT 18.04581941491727 EUR/h
Car passenger VOT 13.502785682035551 EUR/h


In [14]:
probabilities = {
    "prob_{}".format(mode): models.logit(utilities, availability, modes.index(mode))
    for mode in modes
}

simulator = bio.BIOGEME(database, probabilities)
simulation_result = simulator.simulate(result.get_beta_values())

The chosen alternative [`0.0`] is not available for the following observations (rownumber[choice]): 10[0.0]-11[0.0]-12[0.0]-13[0.0]-14[0.0]-15[0.0]-16[0.0]-17[0.0]-18[0.0]-19[0.0]-20[0.0]-21[0.0]-22[0...
The chosen alternative [`1.0`] is not available for the following observations (rownumber[choice]): 10[1.0]-11[1.0]-12[1.0]-13[1.0]-14[1.0]-15[1.0]-16[1.0]-17[1.0]-18[1.0]-19[1.0]-20[1.0]-21[1.0]-22[1...
The chosen alternative [`2.0`] is not available for the following observations (rownumber[choice]): 3[2.0]-9[2.0]-19[2.0]-20[2.0]-24[2.0]-28[2.0]-30[2.0]-31[2.0]-51[2.0]-52[2.0]-72[2.0]-73[2.0]-78[2.0...
The chosen alternative [`3.0`] is not available for the following observations (rownumber[choice]): 0[3.0]-1[3.0]-2[3.0]-3[3.0]-4[3.0]-5[3.0]-6[3.0]-7[3.0]-8[3.0]-9[3.0]-10[3.0]-11[3.0]-12[3.0]-13[3.0...
The chosen alternative [`4.0`] is not available for the following observations (rownumber[choice]): 8[4.0]-9[4.0]-75[4.0]-76[4.0]-77[4.0]-116[4.0]-117[4.0]-131[4.0]-132[4.0]-135[4.0]-1

In [15]:
df_simulation = pd.concat([df, simulation_result], axis = 1).copy()
df_simulation["simulated_mode"] = df_simulation[["prob_{}".format(m) for m in modes]].apply(
    lambda x: np.random.choice(modes, p = x.values), axis = 1
)

distance_bounds = np.array([  100.,   140.,   220.,   300.,   360.,   450.,   580.,   730.,
         940.,  1220.,  1620.,  2120.,  2820.,  3710.,  4850.,  6410.,
        8640., 12150., 17830., 39990.])

df_simulation["euclidean_distance"] = df_simulation["euclidean_distance_km"] * 1e3
df_simulation["distance_class"] = distance_bounds[np.digitize(df_simulation["euclidean_distance"], distance_bounds[:-1])]

df_share = df_simulation.groupby(["distance_class", "simulated_mode"])["weight"].sum().reset_index()
df_total = df_share.groupby("distance_class")["weight"].sum().reset_index(name = "total")
df_share = pd.merge(df_share, df_total, on = "distance_class")
df_share["share"] = df_share["weight"] / df_share["total"]
df_share = df_share.rename(columns = { "simulated_mode": "mode" })
df_share["source"] = "model"

df_reference = df_simulation.groupby(["distance_class", "mode"])["weight"].sum().reset_index()
df_total = df_reference.groupby("distance_class")["weight"].sum().reset_index(name = "total")
df_reference = pd.merge(df_reference, df_total, on = "distance_class")
df_reference["share"] = df_reference["weight"] / df_reference["total"]
df_reference["mode"] = df_reference["mode"].apply(lambda x: modes[x])
df_reference["source"] = "reference"

df_comparison = pd.concat([df_share, df_reference])

In [16]:
import plotly.express as px
px.line(df_comparison, x = "distance_class", y = "share", color = "mode", line_dash = "source")