
# Example of a catalog

Illustration of the concept of catalog. See [Bierlaire and Ortelli (2023)](https://transp-or.epfl.ch/documents/technicalReports/BierOrte23.pdf)

:author: Michel Bierlaire, EPFL
:date: Sun Aug  6 18:13:18 2023


In [None]:
import numpy as np
from biogeme import models
from biogeme.expressions import Beta, Variable, Expression
from biogeme.models import boxcox
from biogeme.catalog import (
    Catalog,
    generic_alt_specific_catalogs,
    segmentation_catalogs,
)
from biogeme.nests import OneNestForNestedLogit, NestsForNestedLogit

from biogeme.data.swissmetro import (
    read_data,
    CHOICE,
    SM_AV,
    CAR_AV_SP,
    TRAIN_AV_SP,
    TRAIN_TT_SCALED,
    TRAIN_COST_SCALED,
    SM_TT_SCALED,
    SM_COST_SCALED,
    CAR_TT_SCALED,
    CAR_CO_SCALED,
)

Function printing all configurations of an expression.



In [None]:
def print_all_configurations(expression: Expression) -> None:
    """Prints all configurations that an expression can take"""
    expression.set_central_controller()
    total = expression.central_controller.number_of_configurations()
    print(f'Total: {total} configurations')
    for config_id in expression.central_controller.all_configurations_ids:
        print(config_id)

Parameters to be estimated.



In [None]:
ASC_CAR = Beta('ASC_CAR', 0, None, None, 0)
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)

Definition of the utility functions.



In [None]:
V1 = ASC_TRAIN + B_TIME * TRAIN_TT_SCALED + B_COST * TRAIN_COST_SCALED
V2 = B_TIME * SM_TT_SCALED + B_COST * SM_COST_SCALED
V3 = ASC_CAR + B_TIME * CAR_TT_SCALED + B_COST * CAR_CO_SCALED

Associate utility functions with the numbering of alternatives.



In [None]:
V = {1: V1, 2: V2, 3: V3}

Associate the availability conditions with the alternatives.



In [None]:
av = {1: TRAIN_AV_SP, 2: SM_AV, 3: CAR_AV_SP}

Definition of the model. This is the contribution of each
observation to the log likelihood function.



In [None]:
logprob_logit = models.loglogit(V, av, CHOICE)

Nest definition.



In [None]:
mu_existing = Beta('mu_existing', 1, 1, 10, 0)
existing = OneNestForNestedLogit(nest_param=mu_existing, list_of_alternatives=[1, 3])
nests = NestsForNestedLogit(choice_set=list(V), tuple_of_nests=(existing,))

Contribution to the log-likelihood.



In [None]:
logprob_nested = models.lognested(V, av, nests, CHOICE)

Definition of the catalog containing two models specifications:
logit and nested logit.



In [None]:
model_catalog = Catalog.from_dict(
    catalog_name='model_catalog',
    dict_of_expressions={
        'logit': logprob_logit,
        'nested': logprob_nested,
    },
)

Current status of the catalog.



In [None]:
print(model_catalog)

Use the controller to select a different configuration.



In [None]:
model_catalog.controlled_by.set_name('nested')
print(model_catalog)

Iterator.



In [None]:
for specification in model_catalog:
    print(specification)

All configurations.



In [None]:
print_all_configurations(model_catalog)

In [None]:
TRAIN_TT = Variable('TRAIN_TT')
TRAIN_COST = Variable('TRAIN_COST')
ell_travel_time = Beta('lambda_travel_time', 1, -10, 10, 0)
linear_train_tt = TRAIN_TT
boxcox_train_tt = boxcox(TRAIN_TT, ell_travel_time)
squared_train_tt = TRAIN_TT * TRAIN_TT
train_tt_catalog = Catalog.from_dict(
    catalog_name='train_tt_catalog',
    dict_of_expressions={
        'linear': linear_train_tt,
        'boxcox': boxcox_train_tt,
        'squared': squared_train_tt,
    },
)

Define a utility function involving the catalog.



In [None]:
ASC_TRAIN = Beta('ASC_TRAIN', 0, None, None, 0)
B_TIME = Beta('B_TIME', 0, None, 0, 0)
V_TRAIN = ASC_TRAIN + B_TIME * train_tt_catalog

In [None]:
print_all_configurations(V_TRAIN)

Unsynchronized catalogs



In [None]:
CAR_TT = Variable('CAR_TT')
CAR_COST = Variable('CAR_COST')
linear_car_tt = CAR_TT
boxcox_car_tt = boxcox(CAR_TT, ell_travel_time)
squared_car_tt = CAR_TT * CAR_TT
car_tt_catalog = Catalog.from_dict(
    catalog_name='car_tt_catalog',
    dict_of_expressions={
        'linear': linear_car_tt,
        'boxcox': boxcox_car_tt,
        'squared': squared_car_tt,
    },
)

Create a dummy expression with the two catalogs.



In [None]:
dummy_expression = train_tt_catalog + car_tt_catalog

In [None]:
print_all_configurations(dummy_expression)

Synchronized catalogs.



In [None]:
CAR_TT = Variable('CAR_TT')
CAR_COST = Variable('CAR_COST')
linear_car_tt = CAR_TT
boxcox_car_tt = boxcox(CAR_TT, ell_travel_time)
squared_car_tt = CAR_TT * CAR_TT
car_tt_catalog = Catalog.from_dict(
    catalog_name='car_tt_catalog',
    dict_of_expressions={
        'linear': linear_car_tt,
        'boxcox': boxcox_car_tt,
        'squared': squared_car_tt,
    },
    controlled_by=train_tt_catalog.controlled_by,
)

Create a dummy expression with the two catalogs.



In [None]:
dummy_expression = train_tt_catalog + car_tt_catalog

In [None]:
print_all_configurations(dummy_expression)

Alternative specific specification.



In [None]:
(B_TIME_catalog_dict, B_COST_catalog_dict) = generic_alt_specific_catalogs(
    generic_name='coefficients',
    beta_parameters=[B_TIME, B_COST],
    alternatives=('TRAIN', 'CAR'),
)

Create utility functions involving those catalogs.



In [None]:
V_TRAIN = (
    B_TIME_catalog_dict['TRAIN'] * TRAIN_TT + B_COST_catalog_dict['TRAIN'] * TRAIN_COST
)
V_CAR = B_TIME_catalog_dict['CAR'] * CAR_TT + B_COST_catalog_dict['CAR'] * CAR_COST

Create a dummy expression involving the utility functions.



In [None]:
dummy_expression = V_TRAIN + V_CAR

In [None]:
print_all_configurations(dummy_expression)

Alternative specific - not synchronized.



In [None]:
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
    generic_name='time_coefficient',
    beta_parameters=[B_TIME],
    alternatives=('TRAIN', 'CAR'),
)

(B_COST_catalog_dict,) = generic_alt_specific_catalogs(
    generic_name='cost_coefficient',
    beta_parameters=[B_COST],
    alternatives=('TRAIN', 'CAR'),
)

Create utility functions involving those catalogs.



In [None]:
V_TRAIN = (
    B_TIME_catalog_dict['TRAIN'] * TRAIN_TT + B_COST_catalog_dict['TRAIN'] * TRAIN_COST
)
V_CAR = B_TIME_catalog_dict['CAR'] * CAR_TT + B_COST_catalog_dict['CAR'] * CAR_COST

Create a dummy expression involving the utility functions.



In [None]:
dummy_expression = V_TRAIN + V_CAR

In [None]:
print_all_configurations(dummy_expression)

Read the data



In [None]:
database = read_data()

Segmentation



We consider two trip purposes: `commuters` and anything else. We
need to define a binary variable first.



In [None]:
database.data['COMMUTERS'] = np.where(database.data['PURPOSE'] == 1, 1, 0)

Segmentation on trip purpose.



In [None]:
segmentation_purpose = database.generate_segmentation(
    variable='COMMUTERS',
    mapping={0: 'non_commuters', 1: 'commuters'},
    reference='non_commuters',
)

Segmentation on luggage.



In [None]:
segmentation_luggage = database.generate_segmentation(
    variable='LUGGAGE',
    mapping={0: 'no_lugg', 1: 'one_lugg', 3: 'several_lugg'},
    reference='no_lugg',
)

Catalog of segmented alternative specific constants, allows a maximum
of two segmentations.



In [None]:
ASC_TRAIN_catalog, ASC_CAR_catalog = segmentation_catalogs(
    generic_name='ASC',
    beta_parameters=[ASC_TRAIN, ASC_CAR],
    potential_segmentations=(
        segmentation_purpose,
        segmentation_luggage,
    ),
    maximum_number=2,
)

Create a dummy expression.



In [None]:
dummy_expression = ASC_TRAIN_catalog + ASC_CAR_catalog

In [None]:
print_all_configurations(dummy_expression)

Catalog of segmented alternative specific constants, allows a maximum
of one segmentation.



In [None]:
ASC_TRAIN_catalog, ASC_CAR_catalog = segmentation_catalogs(
    generic_name='ASC',
    beta_parameters=[ASC_TRAIN, ASC_CAR],
    potential_segmentations=(
        segmentation_purpose,
        segmentation_luggage,
    ),
    maximum_number=1,
)

Create a dummy expression.



In [None]:
dummy_expression = ASC_TRAIN_catalog + ASC_CAR_catalog

In [None]:
print_all_configurations(dummy_expression)

Segmentation and alternative specific
Maximum one segmentation.



In [None]:
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
    generic_name='B_TIME',
    beta_parameters=[B_TIME],
    alternatives=['TRAIN', 'CAR'],
    potential_segmentations=(
        segmentation_purpose,
        segmentation_luggage,
    ),
    maximum_number=1,
)

In [None]:
print_all_configurations(B_TIME_catalog_dict['TRAIN'])

Maximum two segmentations.



In [None]:
(B_TIME_catalog_dict,) = generic_alt_specific_catalogs(
    generic_name='B_TIME',
    beta_parameters=[B_TIME],
    alternatives=['TRAIN', 'CAR'],
    potential_segmentations=(
        segmentation_purpose,
        segmentation_luggage,
    ),
    maximum_number=2,
)

In [None]:
print_all_configurations(B_TIME_catalog_dict['TRAIN'])