In [1]:
## Built-in modules
import os

## Third party modules
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from collections import OrderedDict
%matplotlib inline

## Local modules
# pip install biogeme
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
import biogeme.messaging as msg
import biogeme.tools as tools
import biogeme.results as res
from biogeme.expressions import Beta, DefineVariable, bioDraws, log, MonteCarlo

In [2]:
#import biogeme.results as res

In [3]:
pd.__version__

'1.3.0'

## Read in CFS2017 and Run Dataprep Code

In [2]:
df_raw = pd.read_csv('cfs_2017.csv')

In [3]:
%run CFS_Austin_dataprep.ipynb

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [4]:
df_tx.shape

(252583, 42)

In [16]:
df_tx['mode_agg5'].value_counts()

For-hire Truck    107411
Private Truck      72413
Parcel             65137
Rail/IMX            4547
Air                 3075
Name: mode_agg5, dtype: int64

In [17]:
df_tx['commodity'].value_counts()

mfr_goods      150970
interm_food     29711
bulk            28163
fuel_fert       26664
other           17075
Name: commodity, dtype: int64

In [4]:
df_tx.groupby('mode_agg5')['SHIPMT_WGHT'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
mode_agg5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Air,3075.0,208.2732,839.1691,1.0,2.0,8.0,83.0,14484.0
For-hire Truck,107411.0,15490.57,20600.67,1.0,172.0,2190.0,35496.0,461128.0
Parcel,65137.0,18.02565,27.29339,1.0,2.0,6.0,21.0,165.0
Private Truck,72413.0,9165.305,23341.28,1.0,65.0,567.0,6286.0,447013.0
Rail/IMX,4547.0,1094682.0,5214578.0,2.0,110167.0,187222.0,213800.5,39336402.0


## Biogeme Estimation Setup 

### Create the 'choice' and 'availability' variables

In [10]:
## alt_1 = Air, alt_2 = For-hire Truck, alt_3 = Parcel, alt_4 = Private Truck, alt_5 = Rail/IMX 

choice_dictionary ={'Air' : 1, 'For-hire Truck' : 2, 'Parcel' : 3, 'Private Truck': 4, 'Rail/IMX':5}
df_tx['choice'] = df_tx['mode_agg5'].map(choice_dictionary).astype(int)

## add the parcel filter, make air and private truck threshold at the max national sample
df_tx['AV_1c'] = np.where(((df_tx['SHIPMT_WGHT_TON'] <= 410) | (df_tx['mode_agg5'] == 'Air')), 1, 0) # the treshold is the national max (unweighted)
df_tx['AV_2c'] = 1
df_tx['AV_3c'] = np.where(((df_tx['SHIPMT_WGHT'] <= 150) | (df_tx['mode_agg5'] == 'Parcel')), 1, 0)
df_tx['AV_4c'] = np.where(((df_tx['SHIPMT_DIST_ROUTED'] <= 468) | (df_tx['mode_agg5'] == 'Private Truck')), 1, 0) # the treshold is the national max (unweighted)
df_tx['AV_5c'] = 1


### Create TravelTime and ShipCost variables

In [11]:
## We assume that For-hire Truck and Private Truck have the same travel time and shipping costs.
## The calculations for Rail/IMX, For-hire Truck and Private Truck are based on Stinson et al. (2017)
## The calculations for Air and Parcel are based on Keya (2016), minor modification to Air (adding loading time for external shipments)

m = df_tx['AV_3c'] == 1
df_tx.loc[m, 'random_b'] = np.random.rand(m.sum()) ## We are assigning different shipping speeds for Parcels based on a distribution; hence we need to generate this 'random' variable first 

df_tx['alt_1_traveltime'] = np.where(df_tx['geo'] == 'Within Austin', 1 + df_tx['SHIPMT_DIST_GC']/549.5,
                            np.where(df_tx['geo'] == 'External', 12 + df_tx['SHIPMT_DIST_GC']/549.5,
                            np.nan)) # assume average speed of 549.5 mph
df_tx['alt_2_traveltime'] = np.where(df_tx['geo'] == 'Within Austin', 4 + df_tx['SHIPMT_DIST_ROUTED']/20,
                            np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] <= 650), 16 + df_tx['SHIPMT_DIST_ROUTED']/65,
                            np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] > 650) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1299), 16 + df_tx['SHIPMT_DIST_ROUTED']/38,
                            np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] > 1299), 16 + df_tx['SHIPMT_DIST_ROUTED']/32, np.nan)))) 
df_tx['alt_3_traveltime_b'] = np.where(df_tx['random_b'] <= 0.09, 3*24,
                            np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27), 24,
                            np.where(df_tx['random_b'] > 0.27, 5*24, np.nan)))
df_tx['alt_4_traveltime'] =np.where(df_tx['geo'] == 'Within Austin', 4 + df_tx['SHIPMT_DIST_ROUTED']/20,
                           np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] <= 650), 16 + df_tx['SHIPMT_DIST_ROUTED']/65,
                           np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] > 650) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1299), 16 + df_tx['SHIPMT_DIST_ROUTED']/38,
                           np.where((df_tx['geo'] == 'External') & (df_tx['SHIPMT_DIST_ROUTED'] > 1299), 16 + df_tx['SHIPMT_DIST_ROUTED']/32, np.nan)))) 
df_tx['alt_5_traveltime'] = 12 + df_tx['SHIPMT_DIST_ROUTED']/22 + 12*2 # Assume 2 trackage changes

df_tx['alt_1_shipcost'] = np.where(df_tx['SHIPMT_WGHT'] <= 100, 55, 55+(df_tx['SHIPMT_WGHT']-100))
df_tx['alt_2_shipcost'] = np.where(df_tx['SHIPMT_WGHT'] < 150, 2.83*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'],
                          np.where((df_tx['SHIPMT_WGHT'] >= 150) & (df_tx['SHIPMT_WGHT'] < 1500), 0.50*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'],
                          np.where(df_tx['SHIPMT_WGHT'] >= 1500, 0.18*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'], np.NaN)))
df_tx['alt_3_shipcost_b'] = np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] <= 150), np.exp(2.056+0.016*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 150) & (df_tx['SHIPMT_DIST_ROUTED'] <= 300), np.exp(2.251+0.015*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 300) & (df_tx['SHIPMT_DIST_ROUTED'] <= 600), np.exp(2.362+0.015*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 600) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1000), np.exp(2.555+0.014*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1000) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1400), np.exp(2.739+0.013*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1400) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1800), np.exp(2.905+0.013*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1800), np.exp(3.023+0.013*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] <= 150), np.exp(3.666+0.015*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 150) & (df_tx['SHIPMT_DIST_ROUTED'] <= 300), np.exp(3.993+0.016*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 300) & (df_tx['SHIPMT_DIST_ROUTED'] <= 600), np.exp(4.631+0.01*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 600) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1000), np.exp(4.700+0.01*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1000) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1400), np.exp(4.767+0.015*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1400) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1800), np.exp(4.798+0.015*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] > 0.09) & (df_tx['random_b'] <= 0.27) & (df_tx['SHIPMT_DIST_ROUTED'] > 1800), np.exp(4.855+0.015*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] <= 150), np.exp(3.208+0.014*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 150) & (df_tx['SHIPMT_DIST_ROUTED'] <= 300), np.exp(3.399+0.015*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 300) & (df_tx['SHIPMT_DIST_ROUTED'] <= 600), np.exp(3.560+0.015*df_tx['SHIPMT_WGHT']),
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 600) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1000), np.exp(3.624+0.016*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 1000) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1400), np.exp(3.908+0.016*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 1400) & (df_tx['SHIPMT_DIST_ROUTED'] <= 1800), np.exp(4.010+0.016*df_tx['SHIPMT_WGHT']), 
                          np.where((df_tx['random_b'] <= 0.09) & (df_tx['SHIPMT_DIST_ROUTED'] > 1800), np.exp(4.158+0.016*df_tx['SHIPMT_WGHT']), np.NaN)))))))))))))))))))))
df_tx['alt_4_shipcost'] = np.where(df_tx['SHIPMT_WGHT'] < 150, 2.83*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'],
                          np.where((df_tx['SHIPMT_WGHT'] >= 150) & (df_tx['SHIPMT_WGHT'] < 1500), 0.50*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'],
                          np.where(df_tx['SHIPMT_WGHT'] >= 1500, 0.18*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_GC'], np.NaN)))
df_tx['alt_5_shipcost'] = 0.039*df_tx['SHIPMT_WGHT_TON']*df_tx['SHIPMT_DIST_ROUTED']



  result = getattr(ufunc, method)(*inputs, **kwargs)


### Create Biogeme datasets

In [12]:
## Biogeme only take dataset that contains numbers
df_tx_short = df_tx[['SHIPMT_ID','SHIPMT_DIST','SHIPMT_DIST_GC','SHIPMT_DIST_ROUTED','SHIPMT_WGHT_TON','SHIPMT_WGHT','value_density',
                     'bulk','fuel_fert','interm_food','mfr_goods','other',
                     'wholesale','mfring','mining','retail',
                     'info','management','transwarehouse','alt_1_traveltime','alt_2_traveltime','alt_3_traveltime_b',
                     'alt_4_traveltime','alt_5_traveltime',
                     'alt_1_shipcost','alt_2_shipcost','alt_3_shipcost_b','alt_4_shipcost','alt_5_shipcost','choice',
                     'AV_1c','AV_2c','AV_3c','AV_4c','AV_5c',
                     'WGT_FACTOR','wght_bin1']]

In [13]:
## traveltime and shipcost for Parcel contain NaN, replace with 0 bc Biogeme does not allow NaN in dataset
df_tx_short = df_tx_short.fillna(0).copy() 

In [14]:
database = db.Database('2017cfs_tx', df_tx_short)  

## The following statement allows you to use the names of the variable as Python variable.
globals().update(database.variables)

In [31]:
database.fullData

Unnamed: 0,SHIPMT_ID,SHIPMT_DIST,SHIPMT_DIST_GC,SHIPMT_DIST_ROUTED,SHIPMT_WGHT_TON,SHIPMT_WGHT,value_density,bulk,fuel_fert,interm_food,...,shipmt_dist_250to400,shipmt_dist_more_than_400,val_den_8to15,val_den_more_than_15,wght_bin_1,wght_bin_2,wght_bin_3,wght_bin_4,wght_bin_5,random
5,6,1348,1348,1561,0.0020,4,151.250000,0,0,0,...,0,1,0,1,1,0,0,0,0,0.923535
8,9,1057,1057,1224,0.0190,38,50.473684,0,0,0,...,0,1,0,1,1,0,0,0,0,0.065902
12,13,517,466,517,0.3420,684,0.982456,0,0,0,...,0,1,0,0,0,1,0,0,0,0.854813
43,44,729,729,831,0.0005,1,3.000000,0,0,0,...,0,1,0,0,1,0,0,0,0,0.536287
65,66,419,180,419,69.2885,138577,0.106670,1,0,0,...,0,1,0,0,0,0,0,0,1,0.747795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5978371,5978372,9,7,9,0.0025,5,8.400000,0,1,0,...,0,0,1,0,1,0,0,0,0,0.232000
5978388,5978389,300,300,347,0.0005,1,7.000000,0,0,0,...,1,0,0,0,1,0,0,0,0,0.013320
5978391,5978392,863,863,1011,0.0090,18,14.500000,0,0,0,...,0,1,1,0,1,0,0,0,0,0.784178
5978426,5978427,1211,1025,1211,0.2100,420,37.150000,0,0,0,...,0,1,0,1,0,1,0,0,0,0.140613


## Model Specifications

### Case-specific vars: weight_bin1 + distance + commodities + val_density + naics (insignificant estimates removed)

In [29]:
# Parameters to be estimated
# (0, None, None, 0/1) --> (starting value, lower bound, upper bound, included/excluded in the estimation)
ASC_AIR = Beta('ASC_AIR', 0, None, None, 0)
ASC_FHTRUCK = Beta('ASC_FHTRUCK', 0, None, None, 1)
ASC_PARCEL = Beta('ASC_PARCEL', 0, None, None, 0)
ASC_PTRUCK = Beta('ASC_PTRUCK', 0, None, None, 0)
ASC_RAIL = Beta('ASC_RAIL', 0, None, None, 0)

B_AIR_WGHT = Beta('B_AIR_WGHT', 0, None, None, 0)
B_PARCEL_WGHT = Beta('B_PARCEL_WGHT', 0, None, None, 0)
B_PTRUCK_WGHT = Beta('B_PTRUCK_WGHT', 0, None, None, 0)
B_RAIL_WGHT = Beta('B_RAIL_WGHT', 0, None, None, 0)

B_AIR_VALDEN = Beta('B_AIR_VALDEN', 0, None, None, 0)
B_PARCEL_VALDEN = Beta('B_PARCEL_VALDEN', 0, None, None, 0)
B_PTRUCK_VALDEN = Beta('B_PTRUCK_VALDEN', 0, None, None, 0)
#B_RAIL_VALDEN = Beta('B_RAIL_VALDEN', 0, None, None, 0)

B_AIR_DIST = Beta('B_AIR_DIST', 0, None, None, 0)
B_PARCEL_DIST = Beta('B_PARCEL_DIST', 0, None, None, 0)
B_PTRUCK_DIST = Beta('B_PTRUCK_DIST', 0, None, None, 0)
B_RAIL_DIST = Beta('B_RAIL_DIST', 0, None, None, 0)

#B_AIR_BK = Beta('B_AIR_BK', 0, None, None, 0)
B_PARCEL_BK = Beta('B_PARCEL_BK', 0, None, None, 0) 
B_PTRUCK_BK = Beta('B_PTRUCK_BK', 0, None, None, 0) 
B_RAIL_BK = Beta('B_RAIL_BK', 0, None, None, 0)

#B_AIR_FF = Beta('B_AIR_FF', 0, None, None, 0)
B_PARCEL_FF = Beta('B_PARCEL_FF', 0, None, None, 0)
B_PTRUCK_FF = Beta('B_PTRUCK_FF', 0, None, None, 0)
B_RAIL_FF = Beta('B_RAIL_FF', 0, None, None, 0) 

B_AIR_IF = Beta('B_AIR_IF', 0, None, None, 0)
B_PARCEL_IF = Beta('B_PARCEL_IF', 0, None, None, 0)
B_PTRUCK_IF = Beta('B_PTRUCK_IF', 0, None, None, 0)
B_RAIL_IF = Beta('B_RAIL_IF', 0, None, None, 0) 

B_AIR_MG = Beta('B_AIR_MG', 0, None, None, 0)
#B_PARCEL_MG = Beta('B_PARCEL_MG', 0, None, None, 0)
B_PTRUCK_MG = Beta('B_PTRUCK_MG', 0, None, None, 0)
B_RAIL_MG = Beta('B_RAIL_MG', 0, None, None, 0)

#B_AIR_INFO = Beta('B_AIR_INFO', 0, None, None, 0)
B_PARCEL_INFO = Beta('B_PARCEL_INFO', 0, None, None, 0)
B_PTRUCK_INFO = Beta('B_PTRUCK_INFO', 0, None, None, 0)
#B_RAIL_INFO = Beta('B_RAIL_INFO', 0, None, None, 0)

#B_AIR_MFR = Beta('B_AIR_MFR', 0, None, None, 0)
B_PARCEL_MFR = Beta('B_PARCEL_MFR', 0, None, None, 0)
B_PTRUCK_MFR = Beta('B_PTRUCK_MFR', 0, None, None, 0)
B_RAIL_MFR = Beta('B_RAIL_MFR', 0, None, None, 0)

#B_AIR_MGT = Beta('B_AIR_MGT', 0, None, None, 0)
B_PARCEL_MGT = Beta('B_PARCEL_MGT', 0, None, None, 0)
B_PTRUCK_MGT = Beta('B_PTRUCK_MGT', 0, None, None, 0)
B_RAIL_MGT = Beta('B_RAIL_MGT', 0, None, None, 0)

#B_AIR_MINING = Beta('B_AIR_MINING', 0, None, None, 0)
#B_PARCEL_MINING = Beta('B_PARCEL_MINING', 0, None, None, 0)
#B_PTRUCK_MINING = Beta('B_PTRUCK_MINING', 0, None, None, 0)
#B_RAIL_MINING = Beta('B_RAIL_MINING', 0, None, None, 0)

B_AIR_RETAIL = Beta('B_AIR_RETAIL', 0, None, None, 0)
B_PARCEL_RETAIL = Beta('B_PARCEL_RETAIL', 0, None, None, 0)
B_PTRUCK_RETAIL = Beta('B_PTRUCK_RETAIL', 0, None, None, 0)
#B_RAIL_RETAIL = Beta('B_RAIL_RETAIL', 0, None, None, 0)

B_AIR_TW = Beta('B_AIR_TW', 0, None, None, 0)
#B_PARCEL_TW = Beta('B_PARCEL_TW', 0, None, None, 0)
#B_PTRUCK_TW = Beta('B_PTRUCK_TW', 0, None, None, 0)
B_RAIL_TW = Beta('B_RAIL_TW', 0, None, None, 0)

B_AIR_WS = Beta('B_AIR_WS', 0, None, None, 0)
#B_PARCEL_WS = Beta('B_PARCEL_WS', 0, None, None, 0)
B_PTRUCK_WS = Beta('B_PTRUCK_WS', 0, None, None, 0)
B_RAIL_WS = Beta('B_RAIL_WS', 0, None, None, 0)

B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)

# Definition of the utility functions
V1 = ASC_AIR + B_TIME * alt_1_traveltime + B_COST * alt_1_shipcost + B_AIR_WGHT * wght_bin1 + \
     B_AIR_DIST * SHIPMT_DIST + B_AIR_VALDEN * value_density + \
     B_AIR_IF * interm_food + B_AIR_MG * mfr_goods + \
     B_AIR_RETAIL * retail + B_AIR_TW * transwarehouse + B_AIR_WS * wholesale

V2 = ASC_FHTRUCK + B_TIME * alt_2_traveltime + B_COST * alt_2_shipcost 

V3 = ASC_PARCEL + B_TIME * alt_3_traveltime_b + B_COST * alt_3_shipcost_b + \
     B_PARCEL_DIST * SHIPMT_DIST + B_PARCEL_VALDEN * value_density + \
     B_PARCEL_BK * bulk + B_PARCEL_FF * fuel_fert + B_PARCEL_IF * interm_food + \
     B_PARCEL_INFO * info + B_PARCEL_MGT * management + \
     B_PARCEL_RETAIL * retail + B_PARCEL_MFR * mfring  

V4 = ASC_PTRUCK + B_TIME * alt_4_traveltime + B_COST * alt_4_shipcost + B_PTRUCK_WGHT * wght_bin1 + \
     B_PTRUCK_DIST * SHIPMT_DIST + B_PTRUCK_VALDEN * value_density + \
     B_PTRUCK_BK * bulk + B_PTRUCK_FF * fuel_fert + B_PTRUCK_IF * interm_food + B_PTRUCK_MG * mfr_goods + \
     B_PTRUCK_INFO * info + B_PTRUCK_MGT * management + \
     B_PTRUCK_RETAIL * retail + B_PTRUCK_MFR * mfring + B_PTRUCK_WS * wholesale

V5 = ASC_RAIL + B_TIME * alt_5_traveltime + B_COST * alt_5_shipcost + B_RAIL_WGHT * wght_bin1 + \
     B_RAIL_DIST * SHIPMT_DIST + \
     B_RAIL_BK * bulk + B_RAIL_FF * fuel_fert + B_RAIL_IF * interm_food + B_RAIL_MG * mfr_goods + \
     B_RAIL_MGT * management + \
     B_RAIL_TW * transwarehouse + B_RAIL_MFR * mfring + B_RAIL_WS * wholesale

# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3, 4: V4, 5: V5}

# Associate the availability conditions with the alternatives
av = {1: AV_1c, 2: AV_2c, 3: AV_3c, 4: AV_4c, 5: AV_5c}

In [31]:
# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = models.loglogit(V, av, choice)

# Define level of verbosity
logger = msg.bioMessage()
logger.setGeneral()

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'ml_2017_basic_wghtbin1_dist_commod_valden_naics_SELEC_c'

## It is possible to control the generation of the HTML and the pickle files
#biogeme.generateHtml = True
#biogeme.generatePickle = False
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
pandasResults

[12:32:51] < General >   Remove 8 unused variables from the database as only 29 are used.
[12:32:59] < General >   *** Initial values of the parameters are obtained from the file __ml_2017_basic_wghtbin1_dist_commod_valden_naics_SELEC_c.iter
[12:34:16] < General >   Log likelihood (N = 252583):  -351491.5 Gradient norm:      9e+08 Hessian norm:       2e+14 
[12:35:50] < General >   Log likelihood (N = 252583):    -196240 Gradient norm:      2e+08 Hessian norm:       9e+13 
[12:37:20] < General >   Log likelihood (N = 252583):    -170767 Gradient norm:      9e+07 Hessian norm:       2e+13 
[12:38:55] < General >   Log likelihood (N = 252583):  -163740.7 Gradient norm:      3e+07 Hessian norm:       6e+12 
[12:40:23] < General >   Log likelihood (N = 252583):  -160965.5 Gradient norm:      2e+07 Hessian norm:       8e+11 
[12:41:50] < General >   Log likelihood (N = 252583):  -159821.9 Gradient norm:      1e+07 Hessian norm:       2e+11 
[12:43:13] < General >   Log likelihood (N = 25258

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_AIR,-2.320613,0.098074,-23.661964,0.0,0.10642,-21.806147,0.0
ASC_PARCEL,0.71897,0.020395,35.252825,0.0,0.020536,35.010676,0.0
ASC_PTRUCK,1.914008,0.031902,59.996736,0.0,0.032297,59.261934,0.0
ASC_RAIL,-5.469372,0.173979,-31.436944,0.0,0.146639,-37.298325,0.0
B_AIR_DIST,0.000648,3.3e-05,19.523004,0.0,4e-05,16.055903,0.0
B_AIR_IF,-1.147018,0.226263,-5.069403,3.99066e-07,0.223263,-5.137529,2.783752e-07
B_AIR_MG,1.019366,0.07532,13.533878,0.0,0.075764,13.454577,0.0
B_AIR_RETAIL,0.147029,0.154463,0.951868,0.3411641,0.155017,0.948471,0.3428898
B_AIR_TW,0.469022,0.065012,7.214417,5.415668e-13,0.065026,7.212789,5.482281e-13
B_AIR_VALDEN,3.4e-05,8e-06,4.342205,1.4106e-05,1.5e-05,2.317219,0.02049179


### Replacing wght_bin1 with five wght_bin binary variables (wght_bin_1 as the reference)

In [15]:
# Parameters to be estimated
# (0, None, None, 0/1) --> (starting value, lower bound, upper bound, included/excluded in the estimation)
ASC_AIR = Beta('ASC_AIR', 0, None, None, 0)
ASC_FHTRUCK = Beta('ASC_FHTRUCK', 0, None, None, 1)
ASC_PARCEL = Beta('ASC_PARCEL', 0, None, None, 0)
ASC_PTRUCK = Beta('ASC_PTRUCK', 0, None, None, 0)
ASC_RAIL = Beta('ASC_RAIL', 0, None, None, 0)

B_AIR_WGHT_2 = Beta('B_AIR_WGHT_2', 0, None, None, 0)
B_PARCEL_WGHT_2 = Beta('B_PARCEL_WGHT_2', 0, None, None, 0)
B_PTRUCK_WGHT_2 = Beta('B_PTRUCK_WGHT_2', 0, None, None, 0)
B_RAIL_WGHT_2 = Beta('B_RAIL_WGHT_2', 0, None, None, 0)

B_AIR_WGHT_3 = Beta('B_AIR_WGHT_3', 0, None, None, 0)
B_PARCEL_WGHT_3 = Beta('B_PARCEL_WGHT_3', 0, None, None, 0)
B_PTRUCK_WGHT_3 = Beta('B_PTRUCK_WGHT_3', 0, None, None, 0)
B_RAIL_WGHT_3 = Beta('B_RAIL_WGHT_3', 0, None, None, 0)

B_AIR_WGHT_4 = Beta('B_AIR_WGHT_4', 0, None, None, 0)
B_PARCEL_WGHT_4 = Beta('B_PARCEL_WGHT_4', 0, None, None, 0)
B_PTRUCK_WGHT_4 = Beta('B_PTRUCK_WGHT_4', 0, None, None, 0)
B_RAIL_WGHT_4 = Beta('B_RAIL_WGHT_4', 0, None, None, 0)

B_AIR_WGHT_5 = Beta('B_AIR_WGHT_5', 0, None, None, 0)
B_PARCEL_WGHT_5 = Beta('B_PARCEL_WGHT_5', 0, None, None, 0)
B_PTRUCK_WGHT_5 = Beta('B_PTRUCK_WGHT_5', 0, None, None, 0)
B_RAIL_WGHT_5 = Beta('B_RAIL_WGHT_5', 0, None, None, 0)

B_AIR_VALDEN = Beta('B_AIR_VALDEN', 0, None, None, 0)
B_PARCEL_VALDEN = Beta('B_PARCEL_VALDEN', 0, None, None, 0)
B_PTRUCK_VALDEN = Beta('B_PTRUCK_VALDEN', 0, None, None, 0)
#B_RAIL_VALDEN = Beta('B_RAIL_VALDEN', 0, None, None, 0)

B_AIR_DIST = Beta('B_AIR_DIST', 0, None, None, 0)
B_PARCEL_DIST = Beta('B_PARCEL_DIST', 0, None, None, 0)
B_PTRUCK_DIST = Beta('B_PTRUCK_DIST', 0, None, None, 0)
B_RAIL_DIST = Beta('B_RAIL_DIST', 0, None, None, 0)

#B_AIR_BK = Beta('B_AIR_BK', 0, None, None, 0)
B_PARCEL_BK = Beta('B_PARCEL_BK', 0, None, None, 0) 
B_PTRUCK_BK = Beta('B_PTRUCK_BK', 0, None, None, 0) 
B_RAIL_BK = Beta('B_RAIL_BK', 0, None, None, 0)

#B_AIR_FF = Beta('B_AIR_FF', 0, None, None, 0)
B_PARCEL_FF = Beta('B_PARCEL_FF', 0, None, None, 0)
B_PTRUCK_FF = Beta('B_PTRUCK_FF', 0, None, None, 0)
B_RAIL_FF = Beta('B_RAIL_FF', 0, None, None, 0) 

B_AIR_IF = Beta('B_AIR_IF', 0, None, None, 0)
B_PARCEL_IF = Beta('B_PARCEL_IF', 0, None, None, 0)
B_PTRUCK_IF = Beta('B_PTRUCK_IF', 0, None, None, 0)
B_RAIL_IF = Beta('B_RAIL_IF', 0, None, None, 0) 

B_AIR_MG = Beta('B_AIR_MG', 0, None, None, 0)
#B_PARCEL_MG = Beta('B_PARCEL_MG', 0, None, None, 0)
B_PTRUCK_MG = Beta('B_PTRUCK_MG', 0, None, None, 0)
B_RAIL_MG = Beta('B_RAIL_MG', 0, None, None, 0)

#B_AIR_INFO = Beta('B_AIR_INFO', 0, None, None, 0)
B_PARCEL_INFO = Beta('B_PARCEL_INFO', 0, None, None, 0)
B_PTRUCK_INFO = Beta('B_PTRUCK_INFO', 0, None, None, 0)
#B_RAIL_INFO = Beta('B_RAIL_INFO', 0, None, None, 0)

#B_AIR_MFR = Beta('B_AIR_MFR', 0, None, None, 0)
B_PARCEL_MFR = Beta('B_PARCEL_MFR', 0, None, None, 0)
B_PTRUCK_MFR = Beta('B_PTRUCK_MFR', 0, None, None, 0)
B_RAIL_MFR = Beta('B_RAIL_MFR', 0, None, None, 0)

#B_AIR_MGT = Beta('B_AIR_MGT', 0, None, None, 0)
B_PARCEL_MGT = Beta('B_PARCEL_MGT', 0, None, None, 0)
B_PTRUCK_MGT = Beta('B_PTRUCK_MGT', 0, None, None, 0)
B_RAIL_MGT = Beta('B_RAIL_MGT', 0, None, None, 0)

#B_AIR_MINING = Beta('B_AIR_MINING', 0, None, None, 0)
#B_PARCEL_MINING = Beta('B_PARCEL_MINING', 0, None, None, 0)
#B_PTRUCK_MINING = Beta('B_PTRUCK_MINING', 0, None, None, 0)
#B_RAIL_MINING = Beta('B_RAIL_MINING', 0, None, None, 0)

B_AIR_RETAIL = Beta('B_AIR_RETAIL', 0, None, None, 0)
B_PARCEL_RETAIL = Beta('B_PARCEL_RETAIL', 0, None, None, 0)
B_PTRUCK_RETAIL = Beta('B_PTRUCK_RETAIL', 0, None, None, 0)
#B_RAIL_RETAIL = Beta('B_RAIL_RETAIL', 0, None, None, 0)

B_AIR_TW = Beta('B_AIR_TW', 0, None, None, 0)
#B_PARCEL_TW = Beta('B_PARCEL_TW', 0, None, None, 0)
#B_PTRUCK_TW = Beta('B_PTRUCK_TW', 0, None, None, 0)
B_RAIL_TW = Beta('B_RAIL_TW', 0, None, None, 0)

B_AIR_WS = Beta('B_AIR_WS', 0, None, None, 0)
#B_PARCEL_WS = Beta('B_PARCEL_WS', 0, None, None, 0)
B_PTRUCK_WS = Beta('B_PTRUCK_WS', 0, None, None, 0)
B_RAIL_WS = Beta('B_RAIL_WS', 0, None, None, 0)

B_TIME = Beta('B_TIME', 0, None, None, 0)
B_COST = Beta('B_COST', 0, None, None, 0)

# Definition of the utility functions
V1 = ASC_AIR + B_TIME * alt_1_traveltime + B_COST * alt_1_shipcost + \
     B_AIR_WGHT_2 * wght_bin_2 + B_AIR_WGHT_3 * wght_bin_3 + B_AIR_WGHT_4 * wght_bin_4 + B_AIR_WGHT_5 * wght_bin_5 + \
     B_AIR_DIST * SHIPMT_DIST + B_AIR_VALDEN * value_density + \
     B_AIR_IF * interm_food + B_AIR_MG * mfr_goods + \
     B_AIR_RETAIL * retail + B_AIR_TW * transwarehouse + B_AIR_WS * wholesale

V2 = ASC_FHTRUCK + B_TIME * alt_2_traveltime + B_COST * alt_2_shipcost 

V3 = ASC_PARCEL + B_TIME * alt_3_traveltime_b + B_COST * alt_3_shipcost_b + \
     B_PARCEL_DIST * SHIPMT_DIST + B_PARCEL_VALDEN * value_density + \
     B_PARCEL_BK * bulk + B_PARCEL_FF * fuel_fert + B_PARCEL_IF * interm_food + \
     B_PARCEL_INFO * info + B_PARCEL_MGT * management + \
     B_PARCEL_RETAIL * retail + B_PARCEL_MFR * mfring  

V4 = ASC_PTRUCK + B_TIME * alt_4_traveltime + B_COST * alt_4_shipcost + \
     B_PTRUCK_WGHT_2 * wght_bin_2 + B_PTRUCK_WGHT_3 * wght_bin_3 + B_PTRUCK_WGHT_4 * wght_bin_4 + B_PTRUCK_WGHT_5 * wght_bin_5 + \
     B_PTRUCK_DIST * SHIPMT_DIST + B_PTRUCK_VALDEN * value_density + \
     B_PTRUCK_BK * bulk + B_PTRUCK_FF * fuel_fert + B_PTRUCK_IF * interm_food + B_PTRUCK_MG * mfr_goods + \
     B_PTRUCK_INFO * info + B_PTRUCK_MGT * management + \
     B_PTRUCK_RETAIL * retail + B_PTRUCK_MFR * mfring + B_PTRUCK_WS * wholesale

V5 = ASC_RAIL + B_TIME * alt_5_traveltime + B_COST * alt_5_shipcost + \
     B_RAIL_WGHT_2 * wght_bin_2 + B_RAIL_WGHT_3 * wght_bin_3 + B_RAIL_WGHT_4 * wght_bin_4 + B_RAIL_WGHT_5 * wght_bin_5 + \
     B_RAIL_DIST * SHIPMT_DIST + \
     B_RAIL_BK * bulk + B_RAIL_FF * fuel_fert + B_RAIL_IF * interm_food + B_RAIL_MG * mfr_goods + \
     B_RAIL_MGT * management + \
     B_RAIL_TW * transwarehouse + B_RAIL_MFR * mfring + B_RAIL_WS * wholesale

# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3, 4: V4, 5: V5}

# Associate the availability conditions with the alternatives
av = {1: AV_1c, 2: AV_2c, 3: AV_3c, 4: AV_4c, 5: AV_5c}

In [16]:
# Definition of the model. This is the contribution of each
# observation to the log likelihood function.
logprob = models.loglogit(V, av, choice)

# Define level of verbosity
logger = msg.bioMessage()
logger.setGeneral()

# Create the Biogeme object
biogeme = bio.BIOGEME(database, logprob)
biogeme.modelName = 'ml_2017_basic_5wghtbin_dist_commod_valden_naics_SELEC_c'

## It is possible to control the generation of the HTML and the pickle files
#biogeme.generateHtml = True
#biogeme.generatePickle = False
results = biogeme.estimate()

# Get the results in a pandas table
pandasResults = results.getEstimatedParameters()
pandasResults

[15:59:57] < General >   Remove 23 unused variables from the database as only 32 are used.
[16:00:05] < General >   *** Initial values of the parameters are obtained from the file __ml_2017_basic_5wghtbin_dist_commod_valden_naics_SELEC_c.iter
[16:02:19] < General >   Log likelihood (N = 252583):  -351029.4 Gradient norm:      9e+08 Hessian norm:       2e+14 
[16:04:50] < General >   Log likelihood (N = 252583):    -193740 Gradient norm:      3e+08 Hessian norm:       2e+13 
[16:07:19] < General >   Log likelihood (N = 252583):  -167766.4 Gradient norm:      1e+08 Hessian norm:       6e+12 
[16:09:49] < General >   Log likelihood (N = 252583):    -161504 Gradient norm:      4e+07 Hessian norm:       2e+12 
[16:12:19] < General >   Log likelihood (N = 252583):  -159143.9 Gradient norm:      2e+07 Hessian norm:       5e+11 
[16:14:55] < General >   Log likelihood (N = 252583):  -157554.9 Gradient norm:      9e+06 Hessian norm:       3e+11 
[16:17:23] < General >   Log likelihood (N = 2525

Unnamed: 0,Value,Std err,t-test,p-value,Rob. Std err,Rob. t-test,Rob. p-value
ASC_AIR,-3.594068,0.081438,-44.132615,0.0,0.087528,-41.061725,0.0
ASC_PARCEL,0.651129,0.020537,31.705595,0.0,0.020699,31.456398,0.0
ASC_PTRUCK,1.458594,0.030101,48.456695,0.0,0.030462,47.882104,0.0
ASC_RAIL,-5.480509,0.381386,-14.369975,0.0,0.362763,-15.1077,0.0
B_AIR_DIST,0.00071,3.3e-05,21.253036,0.0,4.1e-05,17.358652,0.0
B_AIR_IF,-1.18411,0.22587,-5.242446,1.58462e-07,0.222474,-5.32247,1.023676e-07
B_AIR_MG,1.032394,0.075266,13.716617,0.0,0.075588,13.65823,0.0
B_AIR_RETAIL,0.116178,0.154429,0.752311,0.4518638,0.154662,0.751176,0.4525465
B_AIR_TW,0.447682,0.064994,6.888033,5.65703e-12,0.064732,6.915955,4.647172e-12
B_AIR_VALDEN,3.7e-05,8e-06,4.446119,8.743548e-06,1.6e-05,2.34409,0.01907357
