## Note: mode1-taxi, mode2-FHV, mode3-shared FHV, mode4-PT, mode5-walking

In [9]:
import numpy as np
import pandas as pd
import warnings
random_seed = 1
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
from tqdm import tqdm
%matplotlib inline

Suggestions from Stan @ July_17

* tune parameter with sampling taxi zone (large pop); do both for MSE and Weightening one; and store all history of tuning parameters

* comapre the population of all modes for all zones with ACS, plotting bar chart

### Zero scnario outcome

In [5]:
# tax = (acs['P(mode1)'].sum())*2.75 + (acs['P(mode2)'].sum())*2.5 + (acs['P(mode3)'].sum())*0.85
# tax

## Data Preparation

In [10]:
acs = pd.read_csv('final_acs_transportation_choice.csv')
acs.head()

Unnamed: 0,taxi_zone,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5),P(mode6)
0,3.0,0.115434,17.843262,7.334361,3885.402712,478.622467,4316.681764
1,4.0,42.851015,140.89118,84.609811,6937.780033,2631.714648,779.153313
2,5.0,0.081377,13.158607,2.120444,1860.706347,40.495673,6312.437553
3,6.0,0.109017,7.637848,1.878344,1974.772111,198.118892,3086.483787
4,7.0,25.522468,142.614028,37.582529,28436.765508,2680.39788,5365.117587


In [11]:
mode_data = pd.read_csv('final_allMode_with_2017wage_cleaned_update.csv', index_col=0)
mode_data = mode_data.dropna()
print(mode_data.shape)
mode_data.head()

(298632, 18)


Unnamed: 0,DOlocationID,ODpair,PUlocationID,duration,mode,nest,price,2500,7500,12500,17500,22500,30000,42500,62500,87500,125000,225000
0,4,3-4,3,39.695,2,1,64.0,0.250819,0.360216,0.648452,0.282211,0.414755,0.592009,0.671282,0.93859,0.586936,0.740725,0.514005
1,4,3-4,3,45.216667,3,1,61.5,0.250819,0.360216,0.648452,0.282211,0.414755,0.592009,0.671282,0.93859,0.586936,0.740725,0.514005
2,4,3-4,3,83.0,4,2,5.5,0.250819,0.360216,0.648452,0.282211,0.414755,0.592009,0.671282,0.93859,0.586936,0.740725,0.514005
3,4,3-4,3,225.933333,5,3,0.0,0.250819,0.360216,0.648452,0.282211,0.414755,0.592009,0.671282,0.93859,0.586936,0.740725,0.514005
4,4,3-4,3,39.695,6,4,9.424,0.250819,0.360216,0.648452,0.282211,0.414755,0.592009,0.671282,0.93859,0.586936,0.740725,0.514005


## Modeling

### Nested Logit Model

In [12]:
def utility(mode, wage, Lambda, dataset):
    '''
    Get the utility for mode j under specific OD pair
    Lambda: parameter that trade-off different transportation mode
    '''
    subset = dataset[dataset['mode'] == mode]
    vj = Lambda * (float(int(wage)/124800) * float(subset['duration']) + float(subset['price'])) #124800: convert wage scale 'year' to 'minitues'. 52 weeks * 5 workdays/week * 8 hrs/day * 60 mins/hour = 124800 minutes
    return -vj 


def InclusiveValue(Nk, Tk, wage, dictVj, Lambda, dataset):
    '''
    Get the inclusive value for nest K
    Nk:nest k
    T: the dict that contains Tao(dissmilarity parameter) for each Nest. for example, Tk meeas Tao for nest K
    '''
    subsetNk = dataset[dataset['nest'] == Nk]
    modes = list(subsetNk['mode'].unique()) #what modes contained in this nest k
    sumIV = 0
    for j in modes:   
        vj = utility(j, wage, Lambda, subsetNk)
        dictVj[j] = vj
        sumIV += np.exp((1/Tk)*vj)  
    IVk = np.log(sumIV)
    return dictVj, IVk
    

def denoSum(T, nestList, wage, Lambda, dataset):
    '''
    Calculate the denomenator for P(y=Nk)
    T: the dict that contains Tk for each Nest, in our case T={1:T1, 2:T2, 3:T3}; T1, T2, T3 defined by us 
    TotK: the total number of nest this model has, in our case TotK = 3
    '''
    deno = 0
    dictIVk = {}
    dictVj = {}
    for Nk in nestList: #k is the k Nest, in our case k=1,2,3
        Tk = T[Nk] #get the tao for nest k 
        dictVj, IVk = InclusiveValue(Nk, Tk, wage, dictVj, Lambda, dataset)
        denok = np.exp(Tk*IVk)
        deno += denok
        dictIVk[Nk] = IVk
    return dictVj, dictIVk, deno


def probability (j, Nk, T, dictVj, dictIVk, deno):  
    '''
    Calculate the probability for the specific mode j and Nest Nk
    '''
    Tk = T[Nk]
    IVk = dictIVk[Nk]
    vj = dictVj[j]
    pjk = np.exp((1/Tk)*vj)/np.exp(IVk) #pjk: P(y=j, y belong to Nk)
    pk = np.exp(Tk*IVk)/deno #pk: P(y belong to Nk)
    pj = pjk*pk #pj: P(y=j)
    return pj

### Apply to our case

In [38]:
def apply_model(T, totmode, wageList, Lambda, dataAll):
    '''
    apply to our case
    '''
    import timeit
    start = timeit.default_timer()

    ODpair_list = list(dataAll.ODpair.unique())
    p = []
    loss_function_deno = []
    pop_wage = [] #store the population under od and wage
    for od in tqdm(ODpair_list,position=0): #indentify OD pair  
        dataOD = dataAll[dataAll.ODpair==od] 
        modeList = list(dataOD['mode'])
        nestList = list(dataOD['nest'])
        
        pop_OD = [] #store the results under each OD pair
        loss_function_deno_OD = [] #store the denominator of the loss function under each OD pair
        
        for wage in wageList:            
            dictVj, dictIVk, deno = denoSum(T, set(nestList), wage, Lambda, dataOD)
            pop_mode = [] #store the population results under each OD pair and each wage
            loss_function_deno_mode = [] #store the deno of loss function under each OD pair and each wage
            pop_wage_mode = [od, wage] #store the population for each wage
            for i in range(1,totmode+1):
                if i in modeList: #not all modes appear in every OD pair
                    prob = probability(i, nestList[modeList.index(i)], T, dictVj, dictIVk, deno) #probability under OD pari and mode i
                    if np.isnan(prob) == True: #if predicted probability is nan, replace it as 0, means no people choose
                        prob = 0
                    
                    pop = dataOD[wage].mean() * prob
                    loss_function_denoi = dataOD[wage].mean() * (prob - prob**2)               
                    pop_mode.append(pop)
                    pop_wage_mode.append(pop)
                    loss_function_deno_mode.append(loss_function_denoi)
                    
                else: 
                    pop_mode.append(0) #the probability of mode which not in the modeList is also 0
                    pop_wage_mode.append(0)
                    loss_function_deno_mode.append(0)
                    #pop_mode.append(1)
            pop_OD.append((pop_mode)) #the shape of pop_OD is len(wageList) * 5 
            pop_wage.append(pop_wage_mode)
            loss_function_deno_OD.append(loss_function_deno_mode)
        pop_OD_sum = [sum(x) for x in zip(*pop_OD)] #sum the population of each wage 
        loss_function_deno_sum = [sum(x) for x in zip(*loss_function_deno_OD)]
        p.append(pop_OD_sum)
        loss_function_deno.append(loss_function_deno_sum)

    df = pd.DataFrame(p, columns=['P(mode1)', 'P(mode2)', 'P(mode3)', 'P(mode4)', 'P(mode5)', 'P(mode6)']
                      , index=ODpair_list)
    df_LF_deno = pd.DataFrame(loss_function_deno, columns=['mode1', 'mode2', 'mode3', 'mode4', 'mode5', 'mode6']
                               , index=ODpair_list)
    
    df_pop_wage = pd.DataFrame(pop_wage, columns=['ODpair', 'wage', 'P(mode1)', 'P(mode2)', 'P(mode3)', 'P(mode4)', 
                                                  'P(mode5)', 'P(mode6)'])
    
    stop = timeit.default_timer()
    timeslot = stop - start
    return df, df_LF_deno, df_pop_wage, timeslot


def compare_with_ground_truth(predictdf, loss_function_deno, truedf):
    '''
    compare our predicted transportation choice with ground truth
    
    The header of the datafrme after merge (named 'data_compare') should be like:
    taxi_zone | P(mode1)_x | P(mode2)_x | P(mode3)_x | P(mode4)_x | P(mode5)_x | P(mode1)_y | P(mode2)_y | P(mode3)_y | P(mode4)_y | P(mode5)_y
    '''
    import numpy as np
    # makesure predictdf and truedf have the same formats
    for col in predictdf.columns:
        predictdf[col] = predictdf[col].astype(float) 
    predictdf = predictdf.fillna(0)
    predictdf = predictdf.replace([np.inf, -np.inf], np.nan)
    predictdf = predictdf.dropna()
    predictdf['taxi_zone'] = predictdf.index.map(lambda x: x.split('-')[0]) #get origin taxi zone from each OD pair
    predictdf = predictdf.groupby('taxi_zone').sum().reset_index() #group the popuation by taxi zone
    predictdf['taxi_zone'] = predictdf['taxi_zone'].astype(int)
    
    #do same thing as predictdf to loss_function_deno dataframe
    for col in loss_function_deno.columns:
        loss_function_deno[col] = loss_function_deno[col].astype(float) 
    loss_function_deno = loss_function_deno.fillna(0)
    loss_function_deno = loss_function_deno.replace([np.inf, -np.inf], np.nan)
    loss_function_deno = loss_function_deno.dropna()
    loss_function_deno['taxi_zone'] = loss_function_deno.index.map(lambda x: x.split('-')[0]) #get origin taxi zone from each OD pair
    loss_function_deno = loss_function_deno.groupby('taxi_zone').sum().reset_index() #group the popuation by taxi zone
    loss_function_deno['taxi_zone'] = loss_function_deno['taxi_zone'].astype(int)   
    
    truedf['taxi_zone'] = truedf['taxi_zone'].astype(int)
    
    data_compare = pd.merge(predictdf, truedf, left_on='taxi_zone', right_on = 'taxi_zone')
#     data_compare = pd.merge(predictdf, truedf, left_on='taxi_zone', right_on = 'taxi_zone', how='left')
    data_compare = data_compare.dropna()  #some zone in predictdf are not exist in acs!!!!?????
    data_compare_zone = data_compare['taxi_zone'].unique()
    loss_function_deno = loss_function_deno[loss_function_deno['taxi_zone'].isin(data_compare_zone)]
#     print(data_compare.head())
    
#     loss = 0
    rloss = 0
    rmse = 0
    for i in range(1,totmode+1):
        #define the loss function
#         lossi = sum((data_compare[data_compare.columns[i]] - data_compare[data_compare.columns[i+totmode]])**2/loss_function_deno[loss_function_deno.columns[i]])
#         rlossi = np.sqrt(lossi)
        data_compare_deno = (data_compare[data_compare.columns[i+totmode]].replace(to_replace=0,value = 1)).copy()
        rlossi = (np.sqrt(sum(((data_compare[data_compare.columns[i]] - data_compare[data_compare.columns[i+totmode]])**2)\
                        /(data_compare_deno))))
#         rlossi = (np.sqrt(sum((data_compare[data_compare.columns[i]] - data_compare[data_compare.columns[i+totmode]])**2)))\
#                         /(sum(data_compare[data_compare.columns[i+totmode]]) if sum(data_compare[data_compare.columns[i+totmode]])!=0 else 1)
        rmsei = (np.sqrt(sum((data_compare[data_compare.columns[i]] - data_compare[data_compare.columns[i+totmode]])**2)))#\
#                         /data_compare[data_compare.columns[i+5]]) if data_compare[data_compare.columns[i+5]]!=0 else \
#                  (np.sqrt(sum((data_compare[data_compare.columns[i]])**2) )
        if rlossi == np.nan: print('Nan encountered')
#     loss += lossi
    rloss += rlossi
    rmse += rmsei
    
    tot_mse = np.sqrt(sum((np.asarray(predictdf.sum()[1:]) - np.asarray(truedf.sum()[1:]))**2))
    return data_compare, loss_function_deno, rloss, rmse, tot_mse

In [42]:
#choose taxi zone 3 to test the algrithm whether bug-free:
T1 = 0.01
T2 = 1
T3 = 1
T4 = 1
T = {1:T1, 2:T2, 3:T3, 4:T4} #Tao for each nest
Lambda = .2
totmode = 6
wagelist = ['2500', '7500', '12500', '17500', '22500', '30000', '42500', '62500', '87500', '125000', '225000']
# testdf = mode_data[mode_data.index.isin(list(mode_data.index[:20]))] #test OD pair 3-1,3-2,3-3,3-4
testdf = mode_data[mode_data['PUlocationID']== 3]
# testdf = mode_data[mode_data.index == '3-2']

predict_choice_test, loss_function_denodf_test, pop_wage_test, timeslot_test = apply_model(T, totmode, wagelist, Lambda, testdf)
combine_test, loss_deno_test, rloss_test, rmse_test, tot_mse_test= compare_with_ground_truth(predict_choice_test, loss_function_denodf_test, acs)
print(predict_choice_test.sum()[1:], (T1, Lambda, rmse_test, tot_mse_test))

print('The time used to run the code:', timeslot_test)
# print('The weighted cumulative square error of this model is:', loss_test)
print('The root of weighted cumulative square error of this model is:', rloss_test)
print('The root of mean squared error of this model is:', rmse_test)
print('The root of mean squared error from total population:', tot_mse_test)

100%|██████████| 237/237 [00:29<00:00,  8.15it/s]

P(mode2)     153.577951
P(mode3)       3.735355
P(mode4)    2014.993096
P(mode5)     509.816521
P(mode6)    5922.089916
dtype: float64 (0.01, 0.2, 1605.408152131542, 1944325.6177265276)
The time used to run the code: 29.261811717002274
The root of weighted cumulative square error of this model is: 24.434893340320773
The root of mean squared error of this model is: 1605.408152131542
The root of mean squared error from total population: 1944325.6177265276





In [19]:
pop_wage_test['sum'] = pop_wage_test['P(mode1)'] +  pop_wage_test['P(mode2)'] +\
         pop_wage_test['P(mode3)'] +  pop_wage_test['P(mode4)'] + \
        pop_wage_test['P(mode5)'] +  pop_wage_test['P(mode6)']


In [20]:
pop_wage_test.head()

Unnamed: 0,ODpair,wage,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5),P(mode6),sum
0,3-4,2500,0.0,0.0,0.0,0.077483,0.13129,0.042046,0.250819
1,3-4,7500,0.0,0.0,0.0,0.156155,0.084175,0.119886,0.360216
2,3-4,12500,0.0,0.0,0.0,0.287219,0.049254,0.311978,0.648452
3,3-4,17500,0.0,0.0,0.0,0.108906,0.005941,0.167364,0.282211
4,3-4,22500,0.0,0.0,0.0,0.129952,0.002255,0.282548,0.414755


In [21]:
loss_function_denodf_test.head()

Unnamed: 0,mode1,mode2,mode3,mode4,mode5,mode6
3-4,0.0,0.0,0.0,0.6485,0.181052,0.625651
3-7,0.0,0.0,0.0,4.524081,0.914991,4.470979
3-9,0.0,0.0,0.0,0.121966,0.021919,0.126573
3-10,0.0,0.0,0.0,0.146089,0.037146,0.151259
3-11,0.0,0.0,0.0,0.593889,0.094664,0.515966


In [22]:
pop_wage_test.head()

Unnamed: 0,ODpair,wage,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5),P(mode6),sum
0,3-4,2500,0.0,0.0,0.0,0.077483,0.13129,0.042046,0.250819
1,3-4,7500,0.0,0.0,0.0,0.156155,0.084175,0.119886,0.360216
2,3-4,12500,0.0,0.0,0.0,0.287219,0.049254,0.311978,0.648452
3,3-4,17500,0.0,0.0,0.0,0.108906,0.005941,0.167364,0.282211
4,3-4,22500,0.0,0.0,0.0,0.129952,0.002255,0.282548,0.414755


In [23]:
combine_test

Unnamed: 0,taxi_zone,P(mode1)_x,P(mode2)_x,P(mode3)_x,P(mode4)_x,P(mode5)_x,P(mode6)_x,P(mode1)_y,P(mode2)_y,P(mode3)_y,P(mode4)_y,P(mode5)_y,P(mode6)_y
0,3,101.78716,153.577951,3.735355,2014.993096,509.816521,5922.089916,0.115434,17.843262,7.334361,3885.402712,478.622467,4316.681764


In [24]:
loss_deno_test

Unnamed: 0,taxi_zone,mode1,mode2,mode3,mode4,mode5,mode6
0,3,89.518978,129.249187,3.656177,1168.081308,362.266873,1353.983725


### Tune the parameters

**Pick the taxi zone which sum population accounts for 80% of the total population**

In [25]:
pop_zone = pd.DataFrame(acs.sum(axis=1), columns=['population']).sort_values('population', ascending=False)
pop_zone['cummulative'] = pop_zone['population'].cumsum(axis=None, skipna=True)
pop_zone['cummulative_percent'] = pop_zone['cummulative']/sum(pop_zone['population'])
pop_zone.head()

Unnamed: 0,population,cummulative,cummulative_percent
82,42250.0,42250.0,0.014506
54,41875.0,84125.0,0.028883
4,36695.0,120820.0,0.041482
114,34325.0,155145.0,0.053267
87,34245.0,189390.0,0.065024


In [27]:
topPoP_zone = pop_zone[pop_zone['cummulative_percent']<=0.60].index
len(topPoP_zone)

86

**Use rloss and rmse to tune the parameters:**

In [187]:
#tune the parameters based on large population taxi zone, use rloss
#REMEMEBER: SAVE THE tuning_history_rloss & tuning_history_rmse AS TXT !!!!!!!
rloss_best = 1e10
best_T1_loss = .2
best_Lambda_loss = .001
tuning_history_rloss = []

rmse_best = 1e10 
best_T1_mse = .2
best_Lambda_mse = .005
tuning_history_rmse = []

for Ti in [.001]: #Set your grid search range
    for lambdai in [.0005]:
        T1 = Ti
        T2 = 1
        T3 = 1
        T4 = 1
        T = {1:T1, 2:T2, 3:T3, 4:T4} #Tao for each nest
        Lambda = lambdai 
        totmode = 6
        wagelist = ['2500', '7500', '12500', '17500', '22500', '30000', '42500', '62500', '87500', '125000', '225000']
        testdf = mode_data[mode_data['PUlocationID'].isin(topPoP_zone)]
        
        predict_choice_test, loss_function_denodf_test, pop_wage_test, timeslot_test = apply_model(T, totmode, wagelist, Lambda, testdf)
        combine_test, loss_deno_test, rloss_test, rmse_test = compare_with_ground_truth(predict_choice_test, loss_function_denodf_test, acs)
        print(predict_choice_test.sum()[1:], (Ti, lambdai, rmse_test))
        
        tuning_history_rloss.append((Ti, lambdai, rloss_test))
        tuning_history_rmse.append((Ti, lambdai, rmse_test))
        
        if rloss_test < rloss_best:
            rloss_best = rloss_test
            best_T1_loss = Ti
            best_Lambda_loss = lambdai
            
        if rmse_test < rmse_best:
            rmse_best = rmse_test
            best_T1_mse = Ti
            best_Lambda_mse = lambdai
            
print('The best parameters form weighting loss are:', rloss_best, best_T1_loss, best_Lambda_loss)
print('The best parameters form rmse are:', rmse_best, best_T1_mse, best_Lambda_mse)

The best parameters form weighting loss are: 165.74423392027845 0.001 0.0005
The best parameters form rmse are: 9577.611797342011 0.001 0.0005


In [83]:
print('The best parameters form weighting loss are:', rloss_best, best_T1_loss, best_Lambda_loss)
print('The best parameters form rmse are:', rmse_best, best_T1_mse, best_Lambda_mse)

The best parameters form weighting loss are: 20985 0.2 0.001
The best parameters form rmse are: 21310.847749170036 0.1 0.0005


In [188]:
tuning_history_rloss = tuning_history_rmse[0::2]

In [189]:
tuning_history_rmse = tuning_history_rmse[1::2]

In [190]:
tuning_history_rloss

[(0.001, 0.0005, 9577.611797342011)]

In [191]:
tuning_history_rmse

[]

In [49]:
with open('tuning_rloss.txt_kv3','w') as f:
    f.write("\n".join(F'{x}' for x in tuning_history_rloss))

with open('tuning_rmse.txt_kv3','w') as f:
    f.write("\n".join(F'{x}' for x in tuning_history_rmse))

Unnamed: 0,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5),P(mode6)
4-3,0.0,0.257884,0.25427,0.169952,0.112349,0.205546
4-6,0.0,0.583221,0.0,0.825247,0.550922,1.040611
4-7,6.613683,6.098216,6.086484,2.358611,2.046708,2.796299
4-9,0.0,0.27223,0.270038,0.153745,0.094839,0.209148
4-10,0.247192,0.239381,0.245836,0.096379,0.052529,0.118683


In [192]:
predict_choice_test = predict_choice_test.reset_index()
predict_choice_test.rename_axis({'index' : 'ODpair'},axis=1,inplace=True)
predict_choice_test['origin'] = predict_choice_test.ODpair.apply(lambda x: x.split('-')[0])

In [193]:
o = list(predict_choice_test.origin.unique())

In [194]:
acs_check = acs[acs.taxi_zone.isin(o)]

In [195]:
predict_choice_test.iloc[:,0:7].head()

Unnamed: 0,ODpair,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5),P(mode6)
0,4-3,0.0,0.16247,0.086493,0.252168,0.24425,0.254619
1,4-6,0.0,0.743388,0.0,0.756806,0.734582,0.765224
2,4-7,6.493206,0.002159,0.004037,6.501776,6.439378,6.559444
3,4-9,0.0,0.149446,0.101404,0.252,0.241056,0.256094
4,4-10,0.150825,0.013678,0.087242,0.253199,0.238958,0.256098


In [205]:
predict_choice_test.iloc[:,1:7].sum(axis=0).values/acs_check.iloc[:,1:].sum(axis=0).values

array([94.34391819, 15.14123078, 25.53578099,  0.39711405,  1.7610402 ,
        1.17402005])

In [204]:
predict_choice_test.iloc[:,1:7].sum(axis=0).sum()

0.9997892783005966

In [203]:
acs_check.iloc[:,1:].sum(axis=0).sum()

223042.99999999997

## Apply to Scenario 1

In [None]:
T1 = ... #Input best T1
Lambda = ... # Input best Lambda 
T2 = 1
T3 = 1
T4 = 1
T = {1:T1, 2:T2, 3:T3, 4:T4} #Tao for each nest
totmode = 6
wagelist = ['2500', '7500', '12500', '17500', '22500', '30000', '42500', '62500', '87500', '125000', '225000']

predict_choice, loss_function_denodf, pop_wage, timeslot = apply_model(T, totmode, wagelist, Lambda, mode_data)
combine, loss_deno, rloss, rmse = compare_with_ground_truth(predict_choice, loss_function_denodf, acs)

In [53]:
print('The time used to run the code:', timeslot)
print('The rmse of this model is:', rmse)
print()
print('The predict transportation choice is:')
predict_choice.head(15)

The time used to run the code: 3046.9771932660005
The rmse of this model is: 22363.61354101562

The predict transportation choice is:


Unnamed: 0,P(mode1),P(mode2),P(mode3),P(mode4),P(mode5)
3-4,0.0,0.514361,2.955623e-09,5.216414,0.269225
3-7,1.992068e-07,3.910663,1.179624e-06,30.705659,1.383677
3-9,0.0,0.6564801,4.014125e-07,1.288755,0.054765
3-10,0.0,0.6715375,4.977698e-13,2.220289,0.108173
3-11,0.0,2.1441130000000002e-18,0.0,3.883684,0.116316
3-13,0.0,4.608020000000001e-22,1.111936,11.574317,0.313747
3-14,0.0,2.684983e-07,1.943933e-15,10.311189,0.688811
3-15,0.0,7.586212,1.90104,9.770136,0.742612
3-16,0.0,0.001828701,7.392624,8.17645,0.429098
3-17,0.0,3.330155e-06,1.041209e-11,19.766922,1.233074


In [54]:
combine.head()

Unnamed: 0,taxi_zone,P(mode1)_x,P(mode2)_x,P(mode3)_x,P(mode4)_x,P(mode5)_x,P(mode1)_y,P(mode2)_y,P(mode3)_y,P(mode4)_y,P(mode5)_y
0,10,696.22652,2210.845068,186.978763,6407.563514,1355.386135,11.652287,149.882786,63.664064,9899.616882,736.183981
1,100,131.163394,1.268864,3.497878,560.384303,5.685561,48.244337,13.880226,2.62301,293.067961,344.184466
2,101,784.254402,953.617604,359.841589,3240.818084,798.468322,0.785213,46.909507,7.434348,5191.320614,893.550318
3,102,2813.388459,1512.98024,118.758824,6902.522214,2263.350262,0.3464,66.242517,22.211104,12097.874199,1428.325779
4,106,655.970129,70.909008,16.412553,2468.909214,49.799097,0.76352,15.625191,2.837223,2986.428291,256.345776


In [55]:
# Save the dataframe to your local computer!!!
predict_choice.to_csv('../results/Final_results/results_scenario1.csv')
combine.to_csv('../results/Final_results/results_scenario1_combined.csv')
pop_wage.tocsv('../results/Final_results/results_pop_under_wage.csv')
loss_function_denodf.to_csv('loss_function_denodf.csv')
loss_deno.to_csv('loss_function_deno.csv')

### Apply to scenario 2 and 3
Only use effeccted taxi zone for Scenario2 (+2.75) and scenario 3 (+10):

**For Scenario 2**
Taxi-zones below 96th street: 140,141, 237, 236, 263, 262, 43, 238, 239, 143,142, 12, 88, 261, 13, 87, 209, 231, 45, 232, 148, 144, 211, 125, 158, 249, 114, 113, 79, 4, 224, 107, 234, 90, 68, 246, 186, 164, 100, 170, 137, 233, 162, 161, 230, 48, 50, 163, 229.

**For Scenario 3**
Taxi-zones under 60th street: 12, 88, 261, 13, 87, 209, 231, 45, 232, 148, 144, 211, 125, 158, 249, 114, 113, 79, 4, 224, 107, 234, 90, 68, 246, 186, 164, 100, 170, 137, 233, 162, 161, 230, 48, 50, 163, 229.