In [13]:
# Translated to .py by Meritxell Pacheco (December 2016)
# Adapted to PandasBiogeme by Nicola Ortelli (November 2019)


import pandas as pd
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme.expressions import Beta
from biogeme.models import loglogit
from sklearn.model_selection import train_test_split

df = pd.read_csv("Data/swissmetro.dat", sep = '\t')
database = db.Database("swissmetro", df)

globals().update(database.variables)

# Exclude data
exclude = (( PURPOSE != 1 ) * ( PURPOSE != 3 ) + ( CHOICE == 0 )) > 0
database.remove(exclude)

df_train, df_test = train_test_split(df, test_size=0.2, random_state = 42)

database_train = db.Database("swissmetro_train", df_train)
database_test = db.Database("swissmetro_test", df_test)

# Parameters to be estimated
ASC_CAR   = Beta('ASC_CAR', 0, None, None, 0)
ASC_SM    = Beta('ASC_SM',  0, None, None, 0)
ASC_TRAIN = Beta('ASC_SBB', 0, None, None, 1)

B_TIME = Beta('B_TIME', 0, None, 0, 0)
B_COST = Beta('B_COST', 0, None, 0, 0)
B_HE   = Beta('B_HE',   0, None, 0, 0)

# Definition of new variables
TRAIN_COST = database_train.DefineVariable('TRAIN_COST', TRAIN_CO * ( GA == 0 ))
SM_COST    = database_train.DefineVariable('SM_COST', SM_CO * ( GA == 0 ))

# Utilities
V_TRAIN = ASC_TRAIN + B_TIME * TRAIN_TT + B_COST * TRAIN_COST + B_HE * TRAIN_HE
V_SM    = ASC_SM    + B_TIME * SM_TT    + B_COST * SM_COST    + B_HE * SM_HE
V_CAR   = ASC_CAR   + B_TIME * CAR_TT   + B_COST * CAR_CO

V = {1: V_TRAIN, 2: V_SM, 3: V_CAR}
av = {1: TRAIN_AV, 2: SM_AV, 3: CAR_AV}

logprob = loglogit(V, av, CHOICE)
biogeme_model = bio.BIOGEME(database_train, logprob)
biogeme_model.modelName = "Base Model"

In [18]:
def process_parent(parent, pairs):
    if parent.getClassName() == 'Times':
        pairs.append(get_pair(parent))
    else:
        try:
            left = parent.left
            right = parent.right
        except:
            return pairs
        else:
            process_parent(left, pairs)
            process_parent(right, pairs)
        return pairs
    
def get_pair(parent):
    left = parent.left
    right = parent.right
    beta = None
    variable = None
    for exp in [left, right]:
        if exp.getClassName() == 'Beta':
            beta = exp.name
        elif exp.getClassName() == 'Variable':
            variable = exp.name
    if beta and variable:
        return (beta, variable)
    else:
        raise ValueError("Parent does not contain beta and variable")
        
def bio_to_rumboost(biogeme_model):
    '''
    Converts a biogeme model to a rumboost dict
    '''
    utils = biogeme_model.loglike.util
    rum_structure = {}
    
    for k, v in utils.items():
        rum_structure[k] = {'cols': [], 'monotone_constraints': [], 'interaction_constraints': [], 'betas': []}
        for i, pair in enumerate(process_parent(v, [])):
            rum_structure[k]['cols'].append(pair[1])
            rum_structure[k]['betas'].append(pair[0])
            rum_structure[k]['interaction_constraints'].append([i])
            bounds = biogeme_model.getBoundsOnBeta(pair[0])
            if (bounds[0] is None) and (bounds[1] is None):
                raise ValueError("Only one bound can be not None")
            if bounds[0] is not None:
                if bounds[0] >= 0:
                    rum_structure[k]['monotone_constraints'].append(1)
            elif bounds[1] is not None:
                if bounds[1] <= 0:
                    rum_structure[k]['monotone_constraints'].append(-1)
            else:
                rum_structure[k]['monotone_constraints'].append(0)
    return rum_structure

def bio_rum_train(biogeme_model):
    rum_structure = bio_to_rumboost(biogeme_model)
    data = biogeme_model.database.data
    target = biogeme_model.loglike.choice.name
    train_data = lgb.Dataset(data, label=data[target], free_raw_data=False)
    return rum_train(param, train_data, valid_sets=[validation_data], rum_structure= rum_structure_1)

In [19]:
bio_to_rumboost(biogeme_model)

{1: {'cols': ['TRAIN_TT', 'TRAIN_COST', 'TRAIN_HE'],
  'monotone_constraints': [-1, -1, -1],
  'interaction_constraints': [[0], [1], [2]],
  'betas': ['B_TIME', 'B_COST', 'B_HE']},
 2: {'cols': ['SM_TT', 'SM_COST', 'SM_HE'],
  'monotone_constraints': [-1, -1, -1],
  'interaction_constraints': [[0], [1], [2]],
  'betas': ['B_TIME', 'B_COST', 'B_HE']},
 3: {'cols': ['CAR_TT', 'CAR_CO'],
  'monotone_constraints': [-1, -1],
  'interaction_constraints': [[0], [1]],
  'betas': ['B_TIME', 'B_COST']}}