# Which correlations do we want

simulate destination mode choice model:
 1) MC
 2) EET fixed error per level
 3) EET only fixed when identical

Does it make a difference in analysis?

In [32]:
import pandas as pd
import numpy as np

from numpy.random import default_rng
from copy import deepcopy

In [96]:
rng = default_rng(999)

### Zones
num_zones = 10
destination = pd.DataFrame(
    data=zip(np.arange(0, num_zones), rng.normal(1000, 10, size=num_zones), strict=True),
    columns=['zone', 'attrs'],
)

### People - assume each person makes one trip from random origin
num_ppl = 10000
vars=['id', 'income', 'orig']
ppl = pd.DataFrame(
    data=zip(
        np.arange(0, num_ppl),
        rng.normal(50, 20, size=(num_ppl)).clip(2, 150),
        rng.choice(np.arange(0, num_zones), num_ppl),
        strict=True,
    ),
    columns=vars,
)


### Modes
num_modes = 3
mode_idx_to_str = {0: 'car', 1: 'pt', 2: 'walk'}
modes = np.array(['car', 'pt', 'walk'])
mode_asc = {'car': 2.5, 'pt': 0.0, 'walk': 10.0}

### Cost and time by mode
skims = {}
skims['time'] = {}
skims['time']['car'] = rng.normal(20, 20, size=(num_zones, num_zones)).clip(4, 60)
skims['time']['pt'] = rng.normal(40, 10, size=(num_zones, num_zones)).clip(4, 80)
skims['time']['walk'] = 3.0 * skims['time']['car'].copy()

skims['cost'] = {}
skims['cost']['car'] = skims['time']['car'].copy() / 4.0
skims['cost']['pt'] = 2.0 * np.ones_like(skims['time']['pt'])
skims['cost']['walk'] = np.zeros_like(skims['time']['walk'])


# scenario: decrease pt travel time to (and within zone) 0 by 70%
scenario_skims = deepcopy(skims)
#scenario_skims['time']['pt'][0,:] *= 0.3
scenario_skims['time']['pt'][:,0] *= 0.3

In [97]:
def logsum(utilities, nest_scale=1.0):
    scaled_utils = utilities / nest_scale
    max_util = np.max(scaled_utils, axis=1)
    return max_util + np.log(np.sum(np.exp(scaled_utils - max_util.reshape((-1,1))), axis=1))

def inverse_ev1_cdf(x, location=0.0, scale=1.0):
    return location - scale * np.log(-np.log(x))

### Utility function parameters
beta_time = {}
beta_time['car'] = -0.06
beta_time['pt'] = -0.03
beta_time['walk'] = -0.2
beta_cost = -0.01

def mode_utility(mode, income, o, d, skims):
    return (
        beta_time[mode] * skims['time'][mode][o,d]
        + beta_cost * skims['cost'][mode][o,d] / income
        + mode_asc[mode]
    )

def mode_logsum(o: int, d: np.array, income: float, skims):
    mode_utils = np.array([mode_utility(m_, income, o, d, skims) for m_ in modes]).T

    return logsum(mode_utils, 1.0)

# mode_logsum(0, np.array([0, 1, 2]), 100)

In [35]:
#inverse_ev1_cdf(default_rng(seed=np.random.SeedSequence([777, 333])).random(100))

In [98]:
beta_logsum = 0.5  # applied to expected maximum utility, so positive sign

dest_altneratives = destination.zone.values
num_dests = dest_altneratives.shape[0]
dest_attrs = np.log(destination['attrs'].values)
ppl['dest_utils'] = ppl.apply(
    lambda x: (
        beta_logsum * mode_logsum(int(x['orig']), dest_altneratives, x['income'], skims)
        + dest_attrs
    ),
    axis=1,
)
ppl['dest_utils_scen'] = ppl.apply(
    lambda x: (
        beta_logsum * mode_logsum(int(x['orig']), dest_altneratives, x['income'], scenario_skims)
        + dest_attrs
    ),
    axis=1,
)

In [99]:
### EET
def destination_choice_model(ppl, seed=777, util_column_name="dest_utils"):
    dest_choice = ppl.apply(
        lambda x: np.argmax(
            x[util_column_name] + inverse_ev1_cdf(
                default_rng(seed=np.random.SeedSequence([seed, int(x['id']), 100])).random(size=num_dests)
            )
        )
        , axis=1
    )
    return dest_choice

def mode_choice_model(ppl, skims, dest_col_name='dest', seed=777, od_seed_contrib=False):
    """"od_seed_contrib: if True, seed depedns on destination"""
    def seed_o_d(d):
        if od_seed_contrib:
            return d
        return 0

    mode_choice = ppl.apply(
        lambda x: np.argmax(
            [mode_utility(m_, x['income'], int(x['orig']), int(x[dest_col_name]), skims) for m_ in modes]
            + inverse_ev1_cdf(default_rng(seed=np.random.SeedSequence(
                [seed, int(x['id']), 200, seed_o_d(int(x[dest_col_name]))]
            )).random(size=num_modes))
        ),
        axis=1,
    )
    #mode_choice = mode_utils.apply(np.argmax)
    return mode_choice.map(mode_idx_to_str)

### MC
def destination_choice_model_mc(ppl, seed=777, util_column_name="dest_utils"):
    dest_choice = ppl.apply(
        lambda x: default_rng(
            seed=np.random.SeedSequence([seed, int(x['id']), 100])
        ).choice(dest_altneratives, p=np.exp(x[util_column_name]) / np.sum(np.exp(x[util_column_name])), size=1)[0],
        axis=1,
    )
    return dest_choice

def mode_choice_model_mc(ppl, skims, dest_col_name='dest', seed=777, od_seed_contrib=False):
    def seed_o_d(d):
        if od_seed_contrib:
            return d
        return 0

    ppl['mode_exp_utils'] = ppl.apply(
        lambda x: np.exp(
            [mode_utility(m_, x['income'], int(x['orig']), int(x[dest_col_name]), skims) for m_ in modes]
        ),
        axis=1,
    )
    mode_choice = ppl.apply(
        lambda x: default_rng(
            seed=np.random.SeedSequence([seed, int(x['id']), 200, seed_o_d(int(x[dest_col_name]))])
        ).choice(modes, p=x.mode_exp_utils / np.sum(x.mode_exp_utils), size=1)[0],
        axis=1,
    )
    ppl.drop(columns=['mode_exp_utils'], inplace=True)
    return mode_choice

In [100]:
#for i in range(0, 10):
#    ppl.dest_utils.apply(lambda x: (np.exp(x) / np.sum(np.exp(x)))[i]).hist(figsize=(3,2))

In [101]:
# 10k ppl: per seed about 3.5s total [w/o sedd, with 12.4]
for s_ in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]:
    ppl[f'dest_{s_}'] = destination_choice_model(ppl, util_column_name='dest_utils', seed=s_)
    ppl[f'dest_scen_{s_}'] = destination_choice_model(ppl, util_column_name='dest_utils_scen', seed=s_)
    ppl[f'dest_mc_{s_}'] = destination_choice_model_mc(ppl, util_column_name='dest_utils', seed=s_)
    ppl[f'dest_mc_scen_{s_}'] = destination_choice_model_mc(ppl, util_column_name='dest_utils_scen', seed=s_)
    
    
    ppl[f'mode_choice_{s_}'] = mode_choice_model(ppl, skims, dest_col_name=f'dest_{s_}', seed=s_)
    ppl[f'mode_choice_scen_{s_}'] = mode_choice_model(ppl, scenario_skims, dest_col_name=f'dest_scen_{s_}', seed=s_)
    #ppl[f'mode_choice_seedd_{s_}'] = mode_choice_model(ppl, skims, dest_col_name=f'dest_{s_}', seed=s_, od_seed_contrib=True)
    #ppl[f'mode_choice_seedd_scen_{s_}'] = mode_choice_model(ppl, scenario_skims, dest_col_name=f'dest_scen_{s_}', seed=s_, od_seed_contrib=True)

    ppl[f'mode_choice_mc_{s_}'] = mode_choice_model_mc(ppl, skims, dest_col_name=f'dest_mc_{s_}', seed=s_)
    ppl[f'mode_choice_mc_scen_{s_}'] = mode_choice_model_mc(ppl, scenario_skims, dest_col_name=f'dest_mc_scen_{s_}', seed=s_)
    #ppl[f'mode_choice_mc_seedd_{s_}'] = mode_choice_model_mc(ppl, skims, dest_col_name=f'dest_mc_{s_}', seed=s_, od_seed_contrib=True)
    #ppl[f'mode_choice_mc_seedd_scen_{s_}'] = mode_choice_model_mc(ppl, scenario_skims, dest_col_name=f'dest_mc_scen_{s_}', seed=s_, od_seed_contrib=True)

In [102]:
def display_metric(metric, ppl, seed):
    return ( 
        ppl[f'{metric}_{seed}'].value_counts().to_frame(f'eet_base_{seed}').join(
        ppl[f'{metric}_mc_{seed}'].value_counts().to_frame(f'mc_base_{seed}'), how='outer').join(
        ppl[f'{metric}_scen_{seed}'].value_counts().to_frame(f'eet_scen_{seed}'), how='outer').join(
        ppl[f'{metric}_mc_scen_{seed}'].value_counts().to_frame(f'mc_scen_{seed}'), how='outer').fillna(0).astype(int)
    )
        # ppl[f'mode_choice_seedd_{seed}'].value_counts().to_frame(f'eet_base_seedd_{seed}'), how='outer').join(
        # ppl[f'mode_choice_mc_seedd_{seed}'].value_counts().to_frame(f'mc_base_seedd_{seed}'), how='outer').join(
        # ppl[f'mode_choice_seedd_scen_{seed}'].value_counts().to_frame(f'eet_scen_seedd_{seed}'), how='outer').join(
        # ppl[f'mode_choice_mc_seedd_scen_{seed}'].value_counts().to_frame(f'mc_scen_seedd_{seed}'), how='outer')\

In [103]:
#pd.concat([display_metric('dest', ppl, i) for i in range(0, 3)], axis=1)

In [104]:
pd.concat([display_metric('mode_choice', ppl, i) for i in range(0, 3)], axis=1)

Unnamed: 0,eet_base_0,mc_base_0,eet_scen_0,mc_scen_0,eet_base_1,mc_base_1,eet_scen_1,mc_scen_1,eet_base_2,mc_base_2,eet_scen_2,mc_scen_2
walk,8817,8843,8806,8831,8750,8794,8733,8785,8824,8846,8815,8839
car,1027,1016,1027,1017,1072,1033,1072,1021,1000,995,988,984
pt,156,141,167,152,178,173,195,194,176,159,197,177


In [105]:
metric = "mode_choice"

dfs_ = []

for seed in range(0, 10):
    for seed_2 in range(0, 10):
        df = ppl[f'{metric}_{seed}'].value_counts().to_frame(f'eet_base_{seed}').join(
                ppl[f'{metric}_scen_{seed_2}'].value_counts().to_frame(f'eet_scen_{seed_2}'), how='outer').join(
                ppl[f'{metric}_mc_{seed}'].value_counts().to_frame(f'mc_base_{seed}'), how='outer').join(
                ppl[f'{metric}_mc_scen_{seed_2}'].value_counts().to_frame(f'mc_scen_{seed_2}'), how='outer'
            ).fillna(0).astype(int)

        df[f'diff_eet_{seed}_{seed_2}'] = df[f'eet_scen_{seed_2}'] - df[f'eet_base_{seed}']
        df[f'diff_mc_{seed}_{seed_2}'] = df[f'mc_scen_{seed_2}'] - df[f'mc_base_{seed}']
        dfs_.append(df[[f'diff_eet_{seed}_{seed_2}', f'diff_mc_{seed}_{seed_2}']])

diff_mc = pd.concat(dfs_, axis=1)

In [106]:
# check eet
diff_mc

Unnamed: 0,diff_eet_0_0,diff_mc_0_0,diff_eet_0_1,diff_mc_0_1,diff_eet_0_2,diff_mc_0_2,diff_eet_0_3,diff_mc_0_3,diff_eet_0_4,diff_mc_0_4,...,diff_eet_9_5,diff_mc_9_5,diff_eet_9_6,diff_mc_9_6,diff_eet_9_7,diff_mc_9_7,diff_eet_9_8,diff_mc_9_8,diff_eet_9_9,diff_mc_9_9
walk,-11,-12,-84,-58,-2,-4,-57,-66,15,-66,...,-7,-60,7,-21,10,-22,81,-45,-6,-14
car,0,1,45,5,-39,-32,13,9,-44,28,...,-18,32,-8,-10,-47,-5,-107,12,-14,1
pt,11,11,39,53,41,36,44,57,29,38,...,25,28,1,31,37,27,26,33,20,13


In [111]:
ppl.loc[(ppl.mode_choice_mc_0 != "walk") & (ppl.mode_choice_mc_scen_0 == "walk")].shape[0], ppl.loc[(ppl.mode_choice_0 != "walk") & (ppl.mode_choice_scen_0 == "walk")].shape[0]

(6, 0)

In [None]:
# increase in pt trips to/from zone 0


# increase/decrease in other areas -> and why. NEST SWITCHING? should not switch away from 0, right?