In [1]:
import numpy as np
import pandas as pd

In [2]:
def utility(mode, Lambda, dataset):
    '''
    Get the utility for mode j under specific OD pair
    Lambda: parameter that trade-off different transportation mode
    '''
    subset = dataset[dataset['mode'] == mode]
    wagelist = ['2500', '7500', '12500', '17500', '22500', '30000', '42500', '62500', '87500', '125000', '225000']
    for index, row in subset.iterrows():
        time = row['duration']
        price = row['price']
        vj = 0
        for wage in wagelist:
            wagePercent = row[wage]
            vj += float(wagePercent)*(Lambda*(float(time)*(float(wage)/525600) + float(price))) #525600: convert wage scale 'year' to 'minitues'
    return -vj #assign negative utility?


def InclusiveValue(Nk, Tk, Lambda, dataset):
    '''
    Get the inclusive value for nest K
    Nk:nest k
    T: the dict that contains Tao(dissmilarity parameter) for each Nest. for example, Tk meeas Tao for nest K
    '''
    subsetNk = dataset[dataset['nest'] == Nk]
    modes = list(subsetNk['mode'].unique()) #what modes contained in this nest k
    sumIV = 0
    for j in modes:
        vj = utility(j, Lambda, subsetNk)
        dictVj[j] = vj
        sumIV += np.exp((1/Tk)*vj)  
    IVk = np.log(sumIV)
    return dictVj, IVk
    

def denoSum(T, nestList, Lambda, dataset):
    '''
    Calculate the denomenator for P(y=Nk)
    T: the dict that contains Tk for each Nest, in our case T={1:T1, 2:T2, 3:T3}; T1, T2, T3 defined by us 
    TotK: the total number of nest this model has, in our case TotK = 3
    '''
    deno = 0
    dictIVk = {}
    dictVj = {}
    for Nk in nestList: #k is the k Nest, in our case k=1,2,3
        Tk = T[Nk] #get the tao for nest k 
        dictVj, IVk = InclusiveValue(Nk, Tk, Lambda, dataset)
        denok = np.exp(Tk*IVk)
        deno += denok
        dictIVk[Nk] = IVk
    return dictVj, dictIVk, deno


def probability (j, Nk, T, dictVj, dictIVk, deno):  
    '''
    Calculate the probability for the specific mode j and Nest Nk
    '''
    Tk = T[Nk] #Tk is the Tao for specific Nest K
    vj = dictVj[j]
    IVk = dictIVk[Nk]
    pjk = np.exp((1/Tk)*vj)/np.exp(IVk) #pjk: P(y=j, y belong to Nk)
    pk = np.exp(Tk*IVk)/deno #pk: P(y belong to Nk)
    pj = pjk*pk #pj: P(y=j)
    return pj


## Apply our dataset:

In [3]:
dataAll = pd.read_csv('final_allMode_with_wage.csv').set_index('ODpair')
dataAll['duration'] = dataAll['duration'].map(lambda x: x/60) #convert the scale of duration from seconds to minutes
dataAll.head(10)

Unnamed: 0_level_0,Unnamed: 0,12500,125000,17500,22500,225000,2500,30000,42500,62500,7500,87500,DOlocationID,PUlocationID,duration,mode,nest,price
ODpair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
3-1,0,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,1,3,56.65,1,1,100.0
3-1,1,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,1,3,55.922881,2,1,126.0
3-1,2,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,1,3,62.42,3,1,120.0
3-1,3,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,1,3,128.0,4,2,5.5
3-1,4,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,1,3,569.083333,5,3,0.0
3-2,5,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,2,3,140.0,4,2,8.25
3-2,6,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,2,3,353.8,5,3,0.0
3-3,7,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,3,3,6.508943,1,1,16.647073
3-4,8,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,4,3,39.695,2,1,64.0
3-4,9,0.108075,0.123454,0.047035,0.069126,0.085667,0.041803,0.098668,0.11188,0.156432,0.060036,0.097823,4,3,45.216667,3,1,61.5


In [4]:
#Prepare the new dataset "OD_modeProportion" to store the proportion predicted by our model
#'-' will be replaced by the predicted probability of each mode after run the model
OD_pair = dataAll.index.unique()
OD_modeProportion = pd.DataFrame(dataAll.index.unique(), columns=['ODpair']).set_index('ODpair')
OD_modeProportion['P(mode1)'] = '-'
OD_modeProportion['P(mode2)'] = '-'
OD_modeProportion['P(mode3)'] = '-'
OD_modeProportion['P(mode4)'] = '-'
OD_modeProportion['P(mode5)'] = '-'
OD_modeProportion.head()

Unnamed: 0_level_0,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5)
ODpair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3-1,-,-,-,-,-
3-2,-,-,-,-,-
3-3,-,-,-,-,-
3-4,-,-,-,-,-
3-5,-,-,-,-,-


In [164]:
#Use OD pair '3-1' to Test Function
T1 = 0.3 #set grid search for T1 
T2 = 1
T3 = 1
T = {1:T1, 2:T2, 3:T3} # T={1: T1, 2:T2, 3:T3}; T1, T2, T3 defined by us 
Lambda = 1 #set grid search for Lambda 
totK = 3
modeList = [1, 2, 3, 4, 5]

dataZone = dataAll[dataAll.index=='3-1']
nestList = dataZone['nest'].unique()
dictVj, dictIVk, deno = denoSum(T, nestList, Lambda, dataZone)
try:
    p_mode1nest1 = probability(1, 1, T, dictVj, dictIVk, deno)
except:
    p_mode1nest1 = 'Nan'
try:
    p_mode2nest1 = probability(2, 1, T, dictVj, dictIVk, deno)
except:
    p_mode2nest1 = 'Nan'
try:
    p_mode3nest1 = probability(3, 1, T, dictVj, dictIVk, deno)
except:
    p_mode3nest1 = 'Nan'
try:
    p_mode4nest2 = probability(4, 2, T, dictVj, dictIVk, deno)
except:
    p_mode4nest2 = 'Nan'
try:
    p_mode5nest3 = probability(5, 3, T, dictVj, dictIVk, deno)
except:
    p_mode5nest3 = 'Nan'
dictVj, dictIVk, deno, p_mode1nest1, p_mode2nest1, p_mode3nest1, p_mode4nest2, p_mode5nest3

({5: -70.42356776947967,
  1: -107.01038825152911,
  2: -132.92040795144365,
  3: -127.72442073540066,
  4: -21.33988872366683},
 {1: -356.7012941717637, 2: -21.33988872366683, 3: -70.42356776947967},
 5.397642079543515e-10,
 6.219776700785853e-38,
 1.9283218437119064e-75,
 6.414184393755254e-68,
 1.0,
 4.822020350315956e-22)

In [166]:
#Apply to all OD pairs

T1 = 5 #set grid search for T1 
T2 = 1
T3 = 1
T = {1:T1, 2:T2, 3:T3} # T={1: T1, 2:T2, 3:T3}; T1, T2, T3 defined by us 
Lambda = 2 #set grid search for Lambda 
nestList = dataZone['nest'].unique()

for i in OD_pair: #filter sub-dataset based on each OD-paired taxizone
    dataZone = dataAll[dataAll.index==i] #dataAll is the dataset that contains all modes 
    dictVj, dictIVk, deno = denoSum(T, nestList, Lambda, dataZone)
    
    try:
        p_mode1nest1 = probability(1, 1, T, dictVj, dictIVk, deno)
    except:
        p_mode1nest1 = 'Nan'
    try:
        p_mode2nest1 = probability(2, 1, T, dictVj, dictIVk, deno)
    except:
        p_mode2nest1 = 'Nan'
    try:
        p_mode3nest1 = probability(3, 1, T, dictVj, dictIVk, deno)
    except:
        p_mode3nest1 = 'Nan'
    try:
        p_mode4nest2 = probability(4, 2, T, dictVj, dictIVk, deno)
    except:
        p_mode4nest2 = 'Nan'
    try:
        p_mode5nest3 = probability(5, 3, T, dictVj, dictIVk, deno)
    except:
        p_mode5nest3 = 'Nan'


    #Assign the predicted proportion to the TPproportion datafrome
    OD_modeProportion.loc[i]['P(mode1)'] = p_mode1nest1
    OD_modeProportion.loc[i]['P(mode2)'] = p_mode2nest1
    OD_modeProportion.loc[i]['P(mode3)'] = p_mode3nest1
    OD_modeProportion.loc[i]['P(mode4)'] = p_mode4nest2
    OD_modeProportion.loc[i]['P(mode5)'] = p_mode5nest3




In [167]:
OD_modeProportion.head()

Unnamed: 0_level_0,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5)
ODpair,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3-1,3.87295e-75,1.22183e-79,9.764419999999999e-79,1.0,2.32519e-43
3-2,,,,1.0,1.53148e-16
3-3,1.0,8.733299999999999e-21,6.979329999999999e-20,,
3-4,5.36132e-36,6.16327e-45,1.27469e-44,1.0,2.59266e-11
3-5,,,,1.0,7.51796e-34
