# Start work here, clean up as you go

Problem below: scale of error term on lower levels needs to be given by nest, not the case atm

Do I remember this correctly and probabilities are calculated as products of marginal and conditional probabilities?
if so, the corresponding utilities at leaf and node levels would need to be calculated, and I would be able to use
these directly, right? CHECK, would make it much easier!

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

In [4]:
import os
import argparse
from datetime import datetime

import numpy as np
import pandas as pd
from numpy.random import default_rng


from activitysim.cli import run
from activitysim.core import inject
from activitysim.core import tracing
from activitysim.core import config
from activitysim.core import pipeline
from activitysim.core import mem
from activitysim.core import chunk
from activitysim.core import simulate
from activitysim.core import logit
from activitysim.abm.models.util.mode import mode_choice_simulate
from activitysim.abm.models.util import estimation
from activitysim.core import expressions
from activitysim.core.util import assign_in_place

In [5]:
pd.set_option("max_columns", 500)

In [6]:
root_dir = "/mnt/c/Users/jan.zill/code/activitysim"
example_dir = os.path.join(root_dir, "test_example_mtc")

In [7]:
os.chdir(example_dir)

In [8]:
parser = argparse.ArgumentParser()
run.add_run_args(parser)
# args = parser.parse_args()
# parser.parse_args(['--sum', '7', '-1', '42'])
args = parser.parse_args(['-c', 'configs', '-o', 'output', '-d', 'data'])
#run.run(args)  # 2mins full example run


if not inject.is_injectable('preload_injectables'):
    from activitysim import abm  # register abm steps and other abm-specific injectables
run.handle_standard_args(args)  # possibly update injectables

## trip mode choice by hand

In [9]:
#inject.get_table('trips').to_frame()  #pipeline.orca.get_raw_table('trips').to_frame()
#inject.get_table('tours_merged').to_frame()  #pipeline.orca.get_raw_table('tours_merged').to_frame()
#inject.get_injectable('network_los')

In [10]:
def run_trip_mode_choice(do_these_purposes=None, simulate_function=simulate.simple_simulate):
    #do_these_purposes=['escort']
    """open pipeline and load stuff for mode choice dev assuming model has been run and pipeline.h5 exists"""
    resume_after = "trip_scheduling"
    model_name = "trip_mode_choice"
    chunk_size = 0  # test_mtc means no chunking

    pipeline.open_pipeline(resume_after)
    # preload any bulky injectables (e.g. skims) not in pipeline
    inject.get_injectable('preload_injectables', None)
    pipeline._PIPELINE.rng().begin_step(model_name)
    step_name = model_name
    args = {}
    checkpoint = pipeline.intermediate_checkpoint(model_name)
    inject.set_step_args(args)

    trips = inject.get_table('trips')
    tours_merged = inject.get_table('tours_merged')
    network_los = inject.get_injectable('network_los')

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    print("Running with %d trips", trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_specs = config.get_logit_model_settings(model_settings)

    estimator = estimation.manager.begin_estimation('trip_mode_choice')

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        if (do_these_purposes is not None) and (primary_purpose not in do_these_purposes):
            continue

        print("trip_mode_choice tour_type '%s' (%s trips)" %
              (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        ################ Replace wrapper function
        #     choices = mode_choice_simulate(...)
        spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
        nest_spec = simulate.eval_nest_coefficients(nest_specs, coefficients, segment_trace_label)
        choices = simulate_function(
            choosers=trips_segment,
            spec=spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            want_logsums=logsum_column_name is not None,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator,
            trace_column_names=None)
        # for consistency, always return dataframe, whether or not logsums were requested
        if isinstance(choices, pd.Series):
            choices = choices.to_frame('choice')
        choices.rename(columns={'logsum': logsum_column_name,
                                'choice': mode_column_name},
                       inplace=True)
        alts = spec.columns
        choices[mode_column_name] = choices[mode_column_name].map(dict(list(zip(list(range(len(alts))), alts))))
        ################
        choices_list.append(choices)
    choices_df_asim = pd.concat(choices_list)

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()

    if (do_these_purposes is not None):
        trips_df  = trips_df.loc[trips_df.primary_purpose.isin(do_these_purposes)]

    assign_in_place(trips_df, choices_df_asim)
    assert not trips_df[mode_column_name].isnull().any()

    finalise = True
    if finalise:
        inject.set_step_args(None)
        #
        pipeline._PIPELINE.rng().end_step(model_name)
        pipeline.add_checkpoint(model_name)
        if not pipeline.intermediate_checkpoint():
            pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

        pipeline.close_pipeline()

    print("Done")

    return trips_df

In [11]:
trips_df = run_trip_mode_choice()

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'atwork' (27 trips)
trip_mode_choice tour_type 'eatout' (33 trips)
trip_mode_choice tour_type 'escort' (6 trips)
trip_mode_choice tour_type 'othdiscr' (43 trips)
trip_mode_choice tour_type 'othmaint' (46 trips)
trip_mode_choice tour_type 'school' (37 trips)
trip_mode_choice tour_type 'shopping' (77 trips)
trip_mode_choice tour_type 'social' (19 trips)
trip_mode_choice tour_type 'univ' (26 trips)
trip_mode_choice tour_type 'work' (168 trips)
Done


In [12]:
trips_df

Unnamed: 0_level_0,person_id,household_id,primary_purpose,trip_num,outbound,trip_count,destination,origin,tour_id,purpose,destination_logsum,depart,trip_mode,mode_choice_logsum
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
8684833,26478,26478,eatout,1,True,1,13,8,1085604,eatout,,11.0,WALK,-1.171760
8684837,26478,26478,eatout,1,False,1,8,13,1085604,home,,11.0,WALK,-1.238719
8685009,26478,26478,othmaint,1,True,1,10,8,1085626,othmaint,,12.0,BIKE,6.198626
8685013,26478,26478,othmaint,1,False,1,8,10,1085626,home,,13.0,BIKE,6.175681
8753057,26686,26686,eatout,1,True,1,5,8,1094132,eatout,,19.0,WALK,4.457539
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2472945113,7539466,2848131,shopping,1,True,1,8,3,309118139,shopping,,18.0,WALK_LOC,12.537675
2472945117,7539466,2848131,shopping,1,False,2,25,8,309118139,shopping,56.842247,21.0,WALK_LOC,11.880804
2472945118,7539466,2848131,shopping,2,False,2,3,25,309118139,home,,22.0,WALK,13.710030
2473024473,7539708,2848373,univ,1,True,1,13,18,309128059,univ,,16.0,WALK_LOC,-0.530696


## nested dev

In [13]:
# see fct above - return if necessary
#spec = simulate.eval_coefficients(model_spec, coefficients, estimator)
#nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label)

In [14]:
#print(nest_spec)
#for nest in logit.each_nest(nest_spec):
#    nest.print()

In [15]:
def eval_nl_dev(choosers, spec, nest_spec, locals_d, custom_chooser, estimator,
                log_alt_losers=False,
                want_logsums=False, trace_label=None,
                trace_choice_name=None, trace_column_names=None):

    trace_label = tracing.extend_trace_label(trace_label, 'eval_nl')
    assert trace_label
    have_trace_targets = tracing.has_trace_targets(choosers)

    logit.validate_nest_spec(nest_spec, trace_label)

    if have_trace_targets:
        tracing.trace_df(choosers, '%s.choosers' % trace_label)

    raw_utilities = simulate.eval_utilities(spec, choosers, locals_d,
                                            log_alt_losers=log_alt_losers,
                                            trace_label=trace_label, have_trace_targets=have_trace_targets,
                                            estimator=estimator, trace_column_names=trace_column_names)
    chunk.log_df(trace_label, "raw_utilities", raw_utilities)

    if have_trace_targets:
        tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = simulate.compute_nested_exp_utilities(raw_utilities, nest_spec)
    chunk.log_df(trace_label, "nested_exp_utilities", nested_exp_utilities)

    del raw_utilities
    chunk.log_df(trace_label, 'raw_utilities', None)

    if have_trace_targets:
        tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])

    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = simulate.compute_nested_probabilities(nested_exp_utilities, nest_spec,
                                                                 trace_label=trace_label)
    chunk.log_df(trace_label, "nested_probabilities", nested_probabilities)

    if want_logsums:
        # logsum of nest root
        logsums = pd.Series(np.log(nested_exp_utilities.root), index=choosers.index)
        chunk.log_df(trace_label, "logsums", logsums)

    del nested_exp_utilities
    chunk.log_df(trace_label, 'nested_exp_utilities', None)

    if have_trace_targets:
        tracing.trace_df(nested_probabilities, '%s.nested_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])

    # global (flattened) leaf probabilities based on relative nest coefficients (in spec order)
    base_probabilities = simulate.compute_base_probabilities(nested_probabilities, nest_spec, spec)
    chunk.log_df(trace_label, "base_probabilities", base_probabilities)

    del nested_probabilities
    chunk.log_df(trace_label, 'nested_probabilities', None)

    if have_trace_targets:
        tracing.trace_df(base_probabilities, '%s.base_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])

    # note base_probabilities could all be zero since we allowed all probs for nests to be zero
    # check here to print a clear message but make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = (base_probabilities.sum(axis=1) - 1).abs() > BAD_PROB_THRESHOLD

    if no_choices.any():

        logit.report_bad_choices(
            no_choices, base_probabilities,
            trace_label=tracing.extend_trace_label(trace_label, 'bad_probs'),
            trace_choosers=choosers,
            msg="base_probabilities do not sum to one")

    if custom_chooser:
        choices, rands = custom_chooser(probs=base_probabilities, choosers=choosers, spec=spec,
                                        trace_label=trace_label)
    else:
        choices, rands = logit.make_choices(base_probabilities, trace_label=trace_label)

    del base_probabilities
    chunk.log_df(trace_label, 'base_probabilities', None)

    if have_trace_targets:
        tracing.trace_df(choices, '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(rands, '%s.rands' % trace_label,
                         columns=[None, 'rand'])
        if want_logsums:
            tracing.trace_df(logsums, '%s.logsums' % trace_label,
                             columns=[None, 'logsum'])

    if want_logsums:
        choices = choices.to_frame('choice')
        choices['logsum'] = logsums

    return choices


def simple_simulate_dev(choosers, spec, nest_spec,
                    skims=None, locals_d=None,
                    chunk_size=0, custom_chooser=None,
                    log_alt_losers=False,
                    want_logsums=False,
                    estimator=None,
                    trace_label=None, trace_choice_name=None, trace_column_names=None):
    trace_label = tracing.extend_trace_label(trace_label, 'simple_simulate')
    assert len(choosers) > 0
    result_list = []
    # segment by person type and pick the right spec for each person type
    for i, chooser_chunk, chunk_trace_label \
            in chunk.adaptive_chunked_choosers(choosers, chunk_size, trace_label):
        # the following replaces choices = _simple_simulate(...)
        if skims is not None:
            simulate.set_skim_wrapper_targets(choosers, skims)

        # only do this for nested, logit is straight forward
        assert nest_spec is not None
        choices = eval_nl_dev(choosers, spec, nest_spec, locals_d,  custom_chooser,
                          log_alt_losers=log_alt_losers,
                          want_logsums=want_logsums,
                          estimator=estimator,
                          trace_label=trace_label,
                          trace_choice_name=trace_choice_name, trace_column_names=trace_column_names)


        result_list.append(choices)
        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)
    assert len(choices.index == len(choosers.index))
    return choices


In [16]:
test_trips = run_trip_mode_choice(do_these_purposes=["escort"], simulate_function=simple_simulate_dev)
test_trips

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'escort' (6 trips)
Done


Unnamed: 0_level_0,person_id,household_id,primary_purpose,trip_num,outbound,trip_count,destination,origin,tour_id,purpose,destination_logsum,depart,trip_mode,mode_choice_logsum
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
137248721,418441,304036,escort,1,True,1,7,10,17156090,escort,,7.0,WALK,11.4358
137248725,418441,304036,escort,1,False,1,10,7,17156090,home,,7.0,WALK,11.48044
211388201,644476,386761,escort,1,True,1,11,16,26423525,escort,,5.0,WALK_LOC,4.789158
211388205,644476,386761,escort,1,False,1,16,11,26423525,home,,6.0,WALK_LOC,5.050171
806388401,2458501,1173905,escort,1,True,1,16,8,100798550,escort,,15.0,WALK_LOC,6.451457
806388405,2458501,1173905,escort,1,False,1,8,16,100798550,home,,16.0,WALK_LOC,6.446188


# Get raw utilities, etc

In [61]:
def eval_nl_dev(choosers, spec, nest_spec, locals_d, custom_chooser, estimator,
                log_alt_losers=False,
                want_logsums=False, trace_label=None,
                trace_choice_name=None, trace_column_names=None):

    trace_label = tracing.extend_trace_label(trace_label, 'eval_nl')
    assert trace_label
    have_trace_targets = tracing.has_trace_targets(choosers)

    logit.validate_nest_spec(nest_spec, trace_label)
    raw_utilities = simulate.eval_utilities(spec, choosers, locals_d,
                                            log_alt_losers=log_alt_losers,
                                            trace_label=trace_label, have_trace_targets=have_trace_targets,
                                            estimator=estimator, trace_column_names=trace_column_names)
    # exponentiated utilities of leaves and nests
    nested_exp_utilities = simulate.compute_nested_exp_utilities(raw_utilities, nest_spec)
    nested_utils = simulate.compute_nested_utilities(raw_utilities, nest_spec)
    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = simulate.compute_nested_probabilities(nested_exp_utilities, nest_spec,
                                                                 trace_label=trace_label)
    if want_logsums:
        # logsum of nest root
        logsums = pd.Series(np.log(nested_exp_utilities.root), index=choosers.index)
    # global (flattened) leaf probabilities based on relative nest coefficients (in spec order)
    base_probabilities = simulate.compute_base_probabilities(nested_probabilities, nest_spec, spec)
    # note base_probabilities could all be zero since we allowed all probs for nests to be zero
    # check here to print a clear message but make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = (base_probabilities.sum(axis=1) - 1).abs() > BAD_PROB_THRESHOLD
    if no_choices.any():
        print("BAD")
    choices, rands = logit.make_choices(base_probabilities, trace_label=trace_label)
    if want_logsums:
        choices = choices.to_frame('choice')
        choices['logsum'] = logsums
    return choices, raw_utilities, nested_exp_utilities, nested_utils


def simple_simulate_dev(choosers, spec, nest_spec,
                        skims=None, locals_d=None,
                        chunk_size=0, custom_chooser=None,
                        log_alt_losers=False,
                        want_logsums=False,
                        estimator=None,
                        trace_label=None, trace_choice_name=None, trace_column_names=None):
    trace_label = tracing.extend_trace_label(trace_label, 'simple_simulate')
    assert len(choosers) > 0
    result_list = []
    # segment by person type and pick the right spec for each person type
    for i, chooser_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers(choosers, chunk_size, trace_label):
        # the following replaces choices = _simple_simulate(...)
        if skims is not None:
            simulate.set_skim_wrapper_targets(choosers, skims)

        # only do this for nested, logit is straight forward
        assert nest_spec is not None
        choices, raw_utilities, nested_exp_utilities, nested_utils = eval_nl_dev(choosers, spec, nest_spec, locals_d,
                                                                    custom_chooser,
                              log_alt_losers=log_alt_losers,
                              want_logsums=want_logsums,
                              estimator=estimator,
                              trace_label=trace_label,
                              trace_choice_name=trace_choice_name, trace_column_names=trace_column_names)


        result_list.append(choices)
        chunk.log_df(trace_label, f'result_list', result_list)

    if len(result_list) > 1:
        choices = pd.concat(result_list)
    assert len(choices.index == len(choosers.index))
    return choices, raw_utilities, nested_exp_utilities, nested_utils


def get_stuff(do_these_purposes=None):
    #do_these_purposes=['escort']
    """open pipeline and load stuff for mode choice dev assuming model has been run and pipeline.h5 exists"""
    resume_after = "trip_scheduling"
    model_name = "trip_mode_choice"
    chunk_size = 0  # test_mtc means no chunking

    pipeline.open_pipeline(resume_after)
    # preload any bulky injectables (e.g. skims) not in pipeline
    inject.get_injectable('preload_injectables', None)
    pipeline._PIPELINE.rng().begin_step(model_name)
    step_name = model_name
    args = {}
    checkpoint = pipeline.intermediate_checkpoint(model_name)
    inject.set_step_args(args)

    trips = inject.get_table('trips')
    tours_merged = inject.get_table('tours_merged')
    network_los = inject.get_injectable('network_los')

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    print("Running with %d trips", trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_specs = config.get_logit_model_settings(model_settings)

    estimator = estimation.manager.begin_estimation('trip_mode_choice')

    choices_list = []
    raw_util_list = []
    nest_list = []
    nu_list = []
    nest_spec_list = []

    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):

        if (do_these_purposes is not None) and (primary_purpose not in do_these_purposes):
            continue

        print("trip_mode_choice tour_type '%s' (%s trips)" %
              (primary_purpose, len(trips_segment.index), ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        ################ Replace wrapper function
        #     choices = mode_choice_simulate(...)
        spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
        nest_spec = simulate.eval_nest_coefficients(nest_specs, coefficients, segment_trace_label)
        choices, raw_utilities, nested_exp_utilities, nested_utils = simple_simulate_dev(
            choosers=trips_segment,
            spec=spec,
            nest_spec=nest_spec,
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            want_logsums=logsum_column_name is not None,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator,
            trace_column_names=None)
        # for consistency, always return dataframe, whether or not logsums were requested
        if isinstance(choices, pd.Series):
            choices = choices.to_frame('choice')
        choices.rename(columns={'logsum': logsum_column_name,
                                'choice': mode_column_name},
                       inplace=True)
        alts = spec.columns
        choices[mode_column_name] = choices[mode_column_name].map(dict(list(zip(list(range(len(alts))), alts))))
        ################
        choices_list.append(choices)
        raw_util_list.append(raw_utilities)
        nest_list.append(nested_exp_utilities)
        nu_list.append(nested_utils)
        nest_spec_list.append(nest_spec)

    choices_df_asim = pd.concat(choices_list)

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()

    if (do_these_purposes is not None):
        trips_df  = trips_df.loc[trips_df.primary_purpose.isin(do_these_purposes)]

    assign_in_place(trips_df, choices_df_asim)
    assert not trips_df[mode_column_name].isnull().any()

    finalise = True
    if finalise:
        inject.set_step_args(None)
        #
        pipeline._PIPELINE.rng().end_step(model_name)
        pipeline.add_checkpoint(model_name)
        if not pipeline.intermediate_checkpoint():
            pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

        pipeline.close_pipeline()

    print("Done")

    return trips_df, raw_util_list, nest_list, nu_list, nest_spec_list

In [62]:
t, ru, neu, nu, ns = get_stuff(do_these_purposes=["escort"])

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'escort' (6 trips)
Done


In [63]:
display(nu[0])#, ns[0])

Unnamed: 0_level_0,DRIVEALONEFREE,DRIVEALONEPAY,DRIVEALONE,SHARED2FREE,SHARED2PAY,SHAREDRIDE2,SHARED3FREE,SHARED3PAY,SHAREDRIDE3,AUTO,WALK,BIKE,NONMOTORIZED,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,WALKACCESS,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,DRIVEACCESS,TRANSIT,TAXI,TNC_SINGLE,TNC_SHARED,RIDEHAIL,root
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1
137248721,-1.294906,-2855.58062,-0.453217,-0.28052,-2854.566235,-0.098182,-2855.57368,-5709.859395,-inf,0.311848,15.883036,-1368.200102,11.435786,-1970.783011,-3968.783011,-3968.783011,-3968.783011,-3968.783011,-inf,-1970.903118,-3973.401006,-3973.401006,-3974.6695,-3975.688637,-inf,-inf,-24.503675,-25.143041,-24.249471,-8.448473,11.435801
137248725,-1.247739,-2855.533453,-0.436709,-0.233697,-2854.519411,-0.081794,-2855.526994,-5709.812708,-inf,0.323683,15.945036,-1368.169103,11.480426,-1970.850529,-3968.850529,-3968.850529,-3968.850529,-3968.850529,-inf,-1971.203369,-3975.938495,-3975.938495,-3971.574832,-3978.057757,-inf,-inf,-24.341318,-25.104291,-24.140831,-8.40687,11.480441
211388201,-2855.520162,-5709.805876,-inf,-17.366498,-2871.652212,-6.078274,-21.226117,-2875.511831,-7.429141,-4.21052,-4.216264,-1381.397292,-3.03571,9.572919,-1988.028653,-1988.028653,-1988.028653,-1988.028653,4.786459,-1988.387109,-3990.994654,-3990.994654,-1994.178373,-1997.184706,-inf,3.446251,-15.861515,-3.864497,-5.06836,-1.296757,3.45691
211388205,-2856.8102,-5711.095914,-inf,-18.093309,-2872.379023,-6.332658,-21.727638,-2876.013352,-7.604673,-4.381625,-4.216264,-1381.397292,-3.03571,10.096155,-1987.903845,-1987.903845,-1987.903845,-1987.903845,5.048078,-1988.132266,-3993.215462,-3993.215462,-1990.677481,-1997.375747,-inf,3.634616,-15.776424,-3.845077,-5.005482,-1.286094,3.643469
806388401,-2855.874866,-5710.16058,-inf,-9.085232,-2863.370946,-3.179831,-10.734064,-2865.019778,-3.756922,-1.968599,6.192499,-1378.480723,4.458599,12.609633,-1984.209027,-1983.750027,-1984.124827,-1984.171027,6.304817,-1985.442854,-3989.293913,-3989.293913,-3990.84356,-3993.682148,-inf,4.539468,-19.143696,-19.495908,-16.361074,-5.853917,5.193789
806388405,-2858.281791,-5712.567505,-inf,-10.45146,-2864.737174,-3.658011,-11.684013,-2865.969727,-4.089405,-2.273383,5.998749,-1378.519473,4.319099,12.638426,-1984.311174,-1983.852174,-1984.226974,-1984.273174,6.319213,-1985.451447,-3990.724818,-3990.724818,-3987.215444,-3994.211225,-inf,4.549833,-18.886576,-19.495133,-16.300395,-5.82854,5.134877


In [64]:
# next: add error terms to alternatives and nodes - can be done with this right here by iterating over nests and
# adding at each level.
# will probably want to vectorise, but that's for later.
# add_random returns a random number per row - this seems like the right thing to use while iterating over nest nodes
# and leafs (which gives us single columns per alternative)
# for destination choice, we might need to rethink this pattern though, but we'll cross that bridge when we come to it

def inverse_ev1_cdf(x, location=0.0, scale=1.0):
    #quantile function of EV1
    # let's follow https://en.wikipedia.org/wiki/Gumbel_distribution where the scale is proportional to variance (not variance^{-1})
    # this means nested scales are between 0 and 1
    # x can be number or np array or pd df for vecops
    return location - scale * np.log(-np.log(x))

In [65]:
utils_df = nu[0]
nest_spec = ns[0]

In [66]:
# fake random channel for prototyping as per Asim tests
from activitysim.core.random import Random
rng = Random()

In [102]:
nest_utils_for_choice = utils_df.copy()  # we'll add random parts to this such that we can recursively choose from
# the top level
for n in logit.each_nest(nest_spec):
    if n.level == 1:
        assert n.name == "root"  # TODO get this from where ever const is defined in code
        continue
    #n.print()
    #print(nest_utils_for_choice.loc[:,n.name])
    # TODO: check parent nest level scale is what we want this is right
    rands = inverse_ev1_cdf(rng.random_for_df(nest_utils_for_choice, n=1), scale=n.parent_nest_scale)
    #print(rands)
    # this will be cleaner wtith xarrays
    nest_utils_for_choice.loc[:,n.name] += rands[:,0]
    #print(nest_utils_for_choice.loc[:,n.name])

In [103]:
# alts = ["DRIVEALONEFREE", "DRIVEALONEPAY"]
# #print(nest_utils_for_choice[alts])
# t_ = nest_utils_for_choice[alts].idxmax(1)
# t_.apply(is_alternative)

In [104]:
all_alternatives = list(map(lambda x: x.name, filter(lambda x: x.is_leaf, logit.each_nest(nest_spec))))
def is_alternative(name):
    return name in all_alternatives

def group_nests_by_level(nest_spec):
    # group nests by level:
    depth = np.max([x.level for x in logit.each_nest(nest_spec)])
    print(f"Nesting depth is {depth}")
    nest_levels = {x: [] for x in range(1, depth+1)}
    for n in logit.each_nest(nest_spec):
        nest_levels[n.level].append(n.name)
    assert len(nest_levels[1]) == 1
    assert nest_levels[1][0] == 'root'
    return nest_levels

nest_utils_for_choice["choice"] = None

for level, alts in group_nests_by_level(nest_spec).items():
    if level == 1:
        continue
    no_choices_made_yet = nest_utils_for_choice["choice"].isnull()
    choice_this_level = nest_utils_for_choice.loc[no_choices_made_yet][alts].idxmax(1)
    nest_utils_for_choice.loc[no_choices_made_yet, "choice"] = \
        np.where(choice_this_level.apply(is_alternative), choice_this_level, None)

Nesting depth is 4


In [105]:
nest_utils_for_choice

Unnamed: 0_level_0,DRIVEALONEFREE,DRIVEALONEPAY,DRIVEALONE,SHARED2FREE,SHARED2PAY,SHAREDRIDE2,SHARED3FREE,SHARED3PAY,SHAREDRIDE3,AUTO,WALK,BIKE,NONMOTORIZED,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,WALKACCESS,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,DRIVEACCESS,TRANSIT,TAXI,TNC_SINGLE,TNC_SHARED,RIDEHAIL,root,choice
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1
137248721,-1.116115,-2855.401829,-0.085418,-0.101729,-2854.387444,0.269616,-2855.394889,-5709.680604,-inf,0.822679,16.250835,-1367.832304,11.946618,-1970.527596,-3968.527596,-3968.527596,-3968.527596,-3968.527596,-inf,-1970.647702,-3973.145591,-3973.145591,-3974.414085,-3975.433221,-inf,-inf,-24.319775,-24.959142,-24.065572,-7.937642,11.435801,WALK
137248725,-0.865187,-2855.150902,0.350254,0.148855,-2854.136859,0.705169,-2855.144442,-5709.430157,-inf,1.416687,16.732,-1367.382139,12.57343,-1970.304027,-3968.304027,-3968.304027,-3968.304027,-3968.304027,-inf,-1970.656867,-3975.391993,-3975.391993,-3971.02833,-3977.511255,-inf,-inf,-23.947836,-24.710809,-23.747349,-7.313866,11.480441,WALK
211388201,-2855.281895,-5709.567609,-inf,-17.12823,-2871.413945,-5.588125,-20.98785,-2875.273564,-6.938992,-3.529757,-3.726114,-1380.907142,-2.354947,9.9133,-1987.688271,-1987.688271,-1987.688271,-1987.688271,5.276609,-1988.046728,-3990.654273,-3990.654273,-1993.837992,-1996.844325,-inf,4.127014,-15.616441,-3.619423,-4.823285,-0.615994,3.45691,WALK_LOC
211388205,-2856.635577,-5710.921291,-inf,-17.918686,-2872.2044,-5.973433,-21.553014,-2875.838729,-7.245448,-3.882701,-3.857039,-1381.038067,-2.536786,10.345617,-1987.654383,-1987.654383,-1987.654383,-1987.654383,5.407303,-1987.882804,-3992.966,-3992.966,-1990.428019,-1997.126285,-inf,4.13354,-15.596812,-3.665464,-4.825869,-0.787171,3.643469,WALK_LOC
806388401,-2855.821604,-5710.107318,-inf,-9.03197,-2863.317684,-3.070264,-10.680802,-2864.966516,-3.647355,-1.816422,6.302066,-1378.371156,4.610776,12.685721,-1984.132938,-1983.673938,-1984.048738,-1984.094938,6.414384,-1985.366765,-3989.217824,-3989.217824,-3990.767471,-3993.606059,-inf,4.691645,-19.088912,-19.441124,-16.30629,-5.701741,5.193789,WALK_LOC
806388405,-2857.992149,-5712.277863,-inf,-10.161818,-2864.447532,-3.062176,-11.394371,-2865.680085,-3.49357,-1.445835,6.594584,-1377.923639,5.146648,13.0522,-1983.8974,-1983.4384,-1983.8132,-1983.8594,6.915048,-1985.037673,-3990.311044,-3990.311044,-3986.80167,-3993.797451,-inf,5.377382,-18.588659,-19.197216,-16.002477,-5.000992,5.134877,WALK_LOC


# OLD


### make choice at each level

In [315]:
def get_alternatives(nests, name):
    alts = list(filter(lambda x: x.name == name, nests))
    assert len(alts) == 1, f"{len(alts)} not one"
    alts = alts[0].alternatives
    return alts

def recursive_choice(row, columns, nest_levels, nests):
    choices = row[columns].idxmax() #axis=1).values[0]
    next_level_columns = get_alternatives(nests, choices)
    #print(f"{choices} leads to columns {next_level_columns}")
    if next_level_columns is None:
        return choices    
    new_choice = recursive_choice(row, next_level_columns, nest_levels, nests)
    return new_choice

lower_bound = np.finfo(np.float64).eps  # chance is very small but let's make it zero. could also check and replace if it ever happened

def make_choice(utils_df, nests, nest_levels, seed=None):
    rng = default_rng(seed=seed)
    rands = rng.uniform(low=lower_bound, high=1.0, size=utils_df.shape[1])
    probs_arr = utils_df - np.log(-np.log(rands))
    choices = probs_arr.apply(lambda x: recursive_choice(x, nest_levels[1], nest_levels, nests), axis=1)
    return choices

In [None]:
#rands = pipeline.get_rn_generator().random_for_df(utils_df, n=utils_df.shape[1])
seed = 9326543345
make_choice(utils_df, nests_, nest_levels, seed)

In [316]:
# group nests by level:
depth = np.max([x.level for x in nests_])
print(depth)
nest_levels = {x: [] for x in range(1, depth+1)}
for n in nests_:
    nest_levels[n.level].append(n.name)
assert len(nest_levels[1]) == 1
assert nest_levels[1][0] == 'root'

4


In [453]:
def simple_simulate_probabilities(trips_segment, spec, nest_spec, locals_d, estimator, tr_label, log_alt_losers, trace_column_names):
    trace_label = tracing.extend_trace_label(tr_label, 'eval_nl')
    logit.validate_nest_spec(nest_spec, trace_label)
    raw_utilities = simulate.eval_utilities(spec, trips_segment, locals_d,
                                   log_alt_losers=log_alt_losers,
                                   trace_label=trace_label, have_trace_targets=False,
                                   estimator=estimator, trace_column_names=trace_column_names)
    nested_exp_utilities = simulate.compute_nested_exp_utilities(raw_utilities, nest_spec)
    nested_probabilities = \
        simulate.compute_nested_probabilities(nested_exp_utilities, nest_spec, trace_label=trace_label)
    # global (flattened) leaf probabilities based on relative nest coefficients (in spec order)
    base_probabilities = simulate.compute_base_probabilities(nested_probabilities, nest_spec, spec)    
    return base_probabilities
#simple_simulate_probabilities(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names)

In [451]:
def simple_simulate_rum(trips_segment, spec, nest_spec, locals_d, estimator, tr_label, log_alt_losers, trace_column_names, custom_chooser=None, seed=None):
    trace_label = tracing.extend_trace_label(tr_label, 'eval_nl')
    logit.validate_nest_spec(nest_spec, trace_label)
    raw_utilities = simulate.eval_utilities(spec, trips_segment, locals_d,
                                   log_alt_losers=log_alt_losers,
                                   trace_label=trace_label, have_trace_targets=False,
                                   estimator=estimator, trace_column_names=trace_column_names)

    utils_df = compute_nested_utilities(raw_utilities, nest_spec)

    nests_ = list(logit.each_nest(nest_spec))
    # group nests by level:
    depth = np.max([x.level for x in nests_])
    nest_levels = {x: [] for x in range(1, depth+1)}
    for n in nests_:
        nest_levels[n.level].append(n.name)
    assert len(nest_levels[1]) == 1
    assert nest_levels[1][0] == 'root'
    # make choices
    choices = make_choice(utils_df, nests_, nest_levels, seed)

    return choices

simple_simulate_rum(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=1233974)

trip_id
86627409      WALK_LRF
86627413      WALK_LRF
86673657      WALK_LOC
86673658          WALK
86673659          WALK
                ...   
2464446025        WALK
2464446029        WALK
2464449633        WALK
2464449634        WALK
2464449637        WALK
Length: 168, dtype: object

In [393]:
def stuff(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
          trace_label=None, log_alt_losers=None, trace_column_names=None, seed=None):
    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):
        #print("trip_mode_choice tour_type '%s' (%s trips)" %
        #            (primary_purpose, len(trips_segment.index), ))
        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label)
        choices = simple_simulate_rum(trips_segment, spec, nest_spec, locals_dict, estimator, 
                                      segment_trace_label, log_alt_losers=log_alt_losers, 
                                      trace_column_names=trace_column_names, seed=seed)

        # for consistency, always return dataframe, whether or not logsums were requested
        if isinstance(choices, pd.Series):
            choices = choices.to_frame('choice')
        choices.rename(columns={'logsum': logsum_column_name,
                                'choice': mode_column_name},
                       inplace=True)
        choices_list.append(choices)

    choices_df = pd.concat(choices_list)
    return choices_df

In [454]:
## caculate probabilities with Asim methodology, should be correct
def gimme_probabilities(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
          trace_label=None, log_alt_losers=None, trace_column_names=None):
    full_probs = []  # analytical probs

    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):
        #print("trip_mode_choice tour_type '%s' (%s trips)" %
        #            (primary_purpose, len(trips_segment.index), ))
        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label)
        #choices = simple_simulate_rum(trips_segment, spec, nest_spec, locals_dict, estimator, 
        #                              segment_trace_label, log_alt_losers=log_alt_losers, 
        #                              trace_column_names=trace_column_names, seed=seed)
        probs = simple_simulate_probabilities(trips_segment, spec, nest_spec, locals_dict, 
                                              estimator, segment_trace_label, log_alt_losers, 
                                              trace_column_names)
        full_probs.append(probs)
    probs_df = pd.concat(full_probs)
    return probs_df


In [455]:
print(f"{datetime.now()} Start")
pipeline.open_pipeline(resume_after)
inject.get_injectable('preload_injectables', None)
model_name = "trip_mode_choice"
pipeline._PIPELINE.rng().begin_step(model_name)

step_name = model_name
args = {}
checkpoint = pipeline.intermediate_checkpoint(model_name)
inject.set_step_args(args)

trips = inject.get_table('trips')
tours_merged = inject.get_table('tours_merged')
network_los = inject.get_injectable('network_los')
chunk_size = 0

trace_label = 'trip_mode_choice'
model_settings_file_name = 'trip_mode_choice.yaml'
model_settings = config.read_model_settings(model_settings_file_name)

logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
mode_column_name = 'trip_mode'
trips_df = trips.to_frame()
#print("Running with %d trips", trips_df.shape[0])
tours_merged = tours_merged.to_frame()
tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]
# - trips_merged - merge trips and tours_merged
trips_merged = pd.merge(
    trips_df,
    tours_merged,
    left_on='tour_id',
    right_index=True,
    how="left")
assert trips_merged.index.equals(trips.index)

# setup skim keys
assert ('trip_period' not in trips_merged)
trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

orig_col = 'origin'
dest_col = 'destination'

constants = {}
constants.update(config.get_model_constants(model_settings))
constants.update({
    'ORIGIN': orig_col,
    'DESTINATION': dest_col
})

skim_dict = network_los.get_default_skim_dict()

odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                           dim3_key='trip_period')
dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                           dim3_key='trip_period')
od_skim_wrapper = skim_dict.wrap('origin', 'destination')

skims = {
    "odt_skims": odt_skim_stack_wrapper,
    "dot_skims": dot_skim_stack_wrapper,
    "od_skims": od_skim_wrapper,
}

model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
nest_spec = config.get_logit_model_settings(model_settings)

estimator = estimation.manager.begin_estimation('trip_mode_choice')


all_choices = []
for i in range(100):
    if i % 10 == 0:
        print(f"{datetime.now()} iteration {i}")
    choices_df = stuff(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
              trace_label=trace_label, log_alt_losers=None, trace_column_names=None, seed=None)
    all_choices.append(choices_df)
all_choices = pd.concat(all_choices, axis=1)

probs_nl = gimme_probabilities(trips_merged, model_settings, constants, skims, model_spec, nest_spec,
                               estimator, logsum_column_name, mode_column_name,trace_label=trace_label, 
                               log_alt_losers=None, trace_column_names=None)

# update trips table with choices (and potionally logssums)
#trips_df = trips.to_frame()
#
#assign_in_place(trips_df, choices_df)
#assert not trips_df[mode_column_name].isnull().any()


finalise = True
if finalise:
    inject.set_step_args(None)
    #
    pipeline._PIPELINE.rng().end_step(model_name)
    pipeline.add_checkpoint(model_name)
    if not pipeline.intermediate_checkpoint():
        pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

    pipeline.close_pipeline()

print(f"{datetime.now()} End")

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


2021-09-01 14:59:46.353007 Start
2021-09-01 14:59:52.976736 End


In [463]:
#all_choices.merge(choices_df_asim[['trip_mode']].rename(columns={'trip_mode': 'asim'}), left_index=True, right_index=True)
val_counts = all_choices.apply(lambda x: x.value_counts(), axis=1).fillna(0)
val_counts = val_counts / all_choices.shape[1]
#val_counts = val_counts.merge(choices_df_asim[['trip_mode']].rename(columns={'trip_mode': 'asim'}), left_index=True, right_index=True)
#val_counts['prob_of_asim_choice'] = val_counts.apply(lambda x: x[x.asim], axis=1)  # this is what our simulation says w.r.t. to asim choice
# for 100% and many samples should mostly agree

In [464]:
#val_counts['prob_of_asim_choice'].hist(bins=100);

In [490]:
mode_share_analytical = (probs_nl.sum(axis=0) / probs_nl.shape[0]).to_frame('analytical')
assert np.allclose(mode_share_analytical.sum(), 1)
mode_share_rum = (val_counts.sum(axis=0) / val_counts.shape[0]).to_frame('experiment')
assert np.allclose(mode_share_rum.sum(), 1)
full_share = mode_share_analytical.join(mode_share_rum, how='outer').fillna(0)
with pd.option_context("precision", 3):
    display((100.0 * full_share).T)

Unnamed: 0,BIKE,DRIVEALONEFREE,DRIVEALONEPAY,DRIVE_COM,DRIVE_EXP,DRIVE_HVY,DRIVE_LOC,DRIVE_LRF,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,TAXI,TNC_SHARED,TNC_SINGLE,WALK,WALK_COM,WALK_EXP,WALK_HVY,WALK_LOC,WALK_LRF
analytical,3.152,0.852,0.0,0.0,0.0,0.0,0.0,0.0,0.685,0.0,0.129,0.0,0.182,0.26,1.334,63.708,0.0,0.0,0.455,18.355,10.887
experiment,3.243,0.88,0.0,0.0,0.0,0.0,0.0,0.0,0.772,0.0,0.156,0.0,0.174,0.259,1.434,63.243,0.0,0.0,0.394,18.639,10.807


### try zenith normalisation of simple_simulate_rum

In [629]:
# TODO: work out if our formulation and formulation belowi s equivalent.
def compute_nested_utilities_zenith(raw_utilities, nest_spec):
    nested_utilities = pd.DataFrame(index=raw_utilities.index)
    for nest in logit.each_nest(nest_spec, post_order=True):
        name = nest.name
        if nest.is_leaf:
            nested_utilities[name] = \
                raw_utilities[name].astype(float) # / nest.product_of_coefficients  #coefficient
        else:
            with np.errstate(divide='ignore'):
                nested_utilities[name] = \
                    nest.coefficient * np.log(np.exp(nested_utilities[nest.alternatives] / nest.coefficient).sum(axis=1))
                
    # now go over all leaves and correct for scale
    for nest in logit.each_nest(nest_spec, post_order=True):
        name = nest.name
        if nest.is_leaf:
            nested_utilities[name] /= nest.coefficient
    
    return nested_utilities


def simple_simulate_rum_zenith(trips_segment, spec, nest_spec, locals_d, estimator, tr_label, log_alt_losers, trace_column_names, custom_chooser=None, seed=None):
    trace_label = tracing.extend_trace_label(tr_label, 'eval_nl')
    logit.validate_nest_spec(nest_spec, trace_label)
    raw_utilities = simulate.eval_utilities(spec, trips_segment, locals_d,
                                   log_alt_losers=log_alt_losers,
                                   trace_label=trace_label, have_trace_targets=False,
                                   estimator=estimator, trace_column_names=trace_column_names)

    utils_df = compute_nested_utilities_zenith(raw_utilities, nest_spec)

    nests_ = list(logit.each_nest(nest_spec))
    # group nests by level:
    depth = np.max([x.level for x in nests_])
    nest_levels = {x: [] for x in range(1, depth+1)}
    for n in nests_:
        nest_levels[n.level].append(n.name)
    assert len(nest_levels[1]) == 1
    assert nest_levels[1][0] == 'root'
    # make choices
    choices = make_choice(utils_df, nests_, nest_levels, seed)

    return choices

#simple_simulate_rum_zenith(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=1233974)

In [630]:
def stuff_zenith(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
          trace_label=None, log_alt_losers=None, trace_column_names=None, seed=None):
    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):
        #print("trip_mode_choice tour_type '%s' (%s trips)" %
        #            (primary_purpose, len(trips_segment.index), ))
        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

        expressions.annotate_preprocessors(
            trips_segment, locals_dict, skims,
            model_settings, segment_trace_label)

        locals_dict.update(skims)

        spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label)
        choices = simple_simulate_rum_zenith(trips_segment, spec, nest_spec, locals_dict, estimator, 
                                      segment_trace_label, log_alt_losers=log_alt_losers, 
                                      trace_column_names=trace_column_names, seed=seed)

        # for consistency, always return dataframe, whether or not logsums were requested
        if isinstance(choices, pd.Series):
            choices = choices.to_frame('choice')
        choices.rename(columns={'logsum': logsum_column_name,
                                'choice': mode_column_name},
                       inplace=True)
        choices_list.append(choices)

    choices_df = pd.concat(choices_list)
    return choices_df

In [631]:
num_samples = 10

rng_test = default_rng(23423)

print(f"{datetime.now()} Start")
pipeline.open_pipeline(resume_after)
inject.get_injectable('preload_injectables', None)
model_name = "trip_mode_choice"
pipeline._PIPELINE.rng().begin_step(model_name)

step_name = model_name
args = {}
checkpoint = pipeline.intermediate_checkpoint(model_name)
inject.set_step_args(args)

trips = inject.get_table('trips')
tours_merged = inject.get_table('tours_merged')
network_los = inject.get_injectable('network_los')
chunk_size = 0

trace_label = 'trip_mode_choice'
model_settings_file_name = 'trip_mode_choice.yaml'
model_settings = config.read_model_settings(model_settings_file_name)

logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
mode_column_name = 'trip_mode'
trips_df = trips.to_frame()
#print("Running with %d trips", trips_df.shape[0])
tours_merged = tours_merged.to_frame()
tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]
# - trips_merged - merge trips and tours_merged
trips_merged = pd.merge(
    trips_df,
    tours_merged,
    left_on='tour_id',
    right_index=True,
    how="left")
assert trips_merged.index.equals(trips.index)

# setup skim keys
assert ('trip_period' not in trips_merged)
trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

orig_col = 'origin'
dest_col = 'destination'

constants = {}
constants.update(config.get_model_constants(model_settings))
constants.update({
    'ORIGIN': orig_col,
    'DESTINATION': dest_col
})

skim_dict = network_los.get_default_skim_dict()
odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                           dim3_key='trip_period')
dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                           dim3_key='trip_period')
od_skim_wrapper = skim_dict.wrap('origin', 'destination')
skims = {
    "odt_skims": odt_skim_stack_wrapper,
    "dot_skims": dot_skim_stack_wrapper,
    "od_skims": od_skim_wrapper,
}
model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
nest_spec = config.get_logit_model_settings(model_settings)
estimator = estimation.manager.begin_estimation('trip_mode_choice')

all_choices_zenith = []
all_choices = []
for i in range(num_samples):
    
    seed = rng_test.integers(0, 100000) #int(9.3 * (i+1)**3)  # why not
    
    if i % 50 == 0:
        print(f"{datetime.now()} iteration {i}")
    choices_df_zenith = stuff_zenith(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
              trace_label=trace_label, log_alt_losers=None, trace_column_names=None, seed=seed)
    all_choices_zenith.append(choices_df_zenith)

    choices_df = stuff(trips_merged, model_settings, constants, skims, model_spec, nest_spec, estimator, logsum_column_name, mode_column_name, 
          trace_label=trace_label, log_alt_losers=None, trace_column_names=None, seed=seed)
    all_choices.append(choices_df)
    
    t_ = choices_df_zenith.merge(choices_df, left_index=True, right_index=True, suffixes=['_zenith', '_asim'])
    diffs = t_.loc[t_.trip_mode_zenith != t_.trip_mode_asim]
    
    #print(f"seed {seed} leads to {diffs.shape[0]} differences. tripids {diffs.index}")

all_choices_zenith = pd.concat(all_choices_zenith, axis=1)
all_choices = pd.concat(all_choices, axis=1)

probs_nl = gimme_probabilities(trips_merged, model_settings, constants, skims, model_spec, nest_spec,
                               estimator, logsum_column_name, mode_column_name,trace_label=trace_label, 
                               log_alt_losers=None, trace_column_names=None)

finalise = True
if finalise:
    inject.set_step_args(None)
    #
    pipeline._PIPELINE.rng().end_step(model_name)
    pipeline.add_checkpoint(model_name)
    if not pipeline.intermediate_checkpoint():
        pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

    pipeline.close_pipeline()

print(f"{datetime.now()} End")

In [643]:
print(f"Have {all_choices_zenith.shape[1]} samples")

Have 2038 samples


In [633]:
val_counts_zenith = all_choices_zenith.apply(lambda x: x.value_counts(), axis=1).fillna(0)
val_counts_zenith = val_counts_zenith / all_choices_zenith.shape[1]

val_counts = all_choices.apply(lambda x: x.value_counts(), axis=1).fillna(0)
val_counts = val_counts / all_choices.shape[1]

In [634]:
mode_share_rum_zenith = (val_counts_zenith.sum(axis=0) / val_counts_zenith.shape[0]).to_frame('experiment_zenith')
assert np.allclose(mode_share_rum_zenith.sum(), 1)
full_share_incl_zenith = full_share.merge(mode_share_rum_zenith, left_index=True, right_index=True, how='outer').fillna(0)
with pd.option_context("precision", 3):
    display((100.0 * full_share_incl_zenith).T)

Unnamed: 0,BIKE,DRIVEALONEFREE,DRIVEALONEPAY,DRIVE_COM,DRIVE_EXP,DRIVE_HVY,DRIVE_LOC,DRIVE_LRF,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,TAXI,TNC_SHARED,TNC_SINGLE,WALK,WALK_COM,WALK_EXP,WALK_HVY,WALK_LOC,WALK_LRF
analytical,3.152,0.852,0.0,0.0,0.0,0.0,0.0,0.0,0.685,0.0,0.129,0.0,0.182,0.26,1.334,63.708,0.0,0.0,0.455,18.355,10.887
experiment,3.243,0.88,0.0,0.0,0.0,0.0,0.0,0.0,0.772,0.0,0.156,0.0,0.174,0.259,1.434,63.243,0.0,0.0,0.394,18.639,10.807
experiment_zenith,3.15,0.874,0.0,0.0,0.0,0.0,0.0,0.0,0.68,0.0,0.136,0.0,0.154,0.236,1.285,63.767,0.0,0.0,0.515,18.824,10.379


## investigate diverging seed and look at diff in formulation

In [619]:
# TODO: work out if our formulation and formulation based on asim probabilities is equivalent
def compute_nested_utilities_asim(raw_utilities, nest_spec):
    nested_utilities = pd.DataFrame(index=raw_utilities.index)
    for nest in logit.each_nest(nest_spec, post_order=True):
        name = nest.name
        if nest.is_leaf:
            nested_utilities[name] = \
                raw_utilities[name].astype(float) / nest.product_of_coefficients
        else:
            with np.errstate(divide='ignore'):
                nested_utilities[name] = \
                    nest.coefficient * np.log(np.exp(nested_utilities[nest.alternatives]).sum(axis=1))
    return nested_utilities

def compute_nested_utilities_zenith_check(raw_utilities, nest_spec):
    nested_utilities = pd.DataFrame(index=raw_utilities.index)
    for nest in logit.each_nest(nest_spec, post_order=True):
        name = nest.name
        if nest.is_leaf:
            nested_utilities[name] = \
                raw_utilities[name].astype(float)  # scale correction is below
        else:
            with np.errstate(divide='ignore'):
                nested_utilities[name] = \
                    nest.coefficient * np.log(np.exp(nested_utilities[nest.alternatives] / nest.coefficient).sum(axis=1))
                
    # now go over all leaves and correct for scale
    for nest in logit.each_nest(nest_spec):
        name = nest.name
        if nest.is_leaf:
            nested_utilities[name] /= nest.coefficient
    
    return nested_utilities

In [688]:
def simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_d, estimator, tr_label, log_alt_losers, trace_column_names, custom_chooser=None, seed=None, use_zenith=True, raw_utilities=None):
    trace_label = tracing.extend_trace_label(tr_label, 'eval_nl')
    logit.validate_nest_spec(nest_spec, trace_label)
    
    if raw_utilities is None:
        raw_utilities = simulate.eval_utilities(spec, trips_segment, locals_d,
                                       log_alt_losers=log_alt_losers,
                                       trace_label=trace_label, have_trace_targets=False,
                                       estimator=estimator, trace_column_names=trace_column_names)

    if use_zenith:
        utils_df = compute_nested_utilities_zenith_check(raw_utilities, nest_spec)
    else:
        utils_df = compute_nested_utilities_asim(raw_utilities, nest_spec)

    # test
    #return compute_nested_utilities_zenith_check(raw_utilities, nest_spec), compute_nested_utilities_asim(raw_utilities, nest_spec)
        
    nests_ = list(logit.each_nest(nest_spec))
    # group nests by level:
    depth = np.max([x.level for x in nests_])
    nest_levels = {x: [] for x in range(1, depth+1)}
    for n in nests_:
        nest_levels[n.level].append(n.name)
    assert len(nest_levels[1]) == 1
    assert nest_levels[1][0] == 'root'
    # make choices
    choices = make_choice(utils_df, nests_, nest_levels, seed)

    return choices

In [626]:
# t_z, t_a = simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=seed)
# tr_id = 86673661
# display(t_z.loc[t_z.index==tr_id])
# display(t_a.loc[t_a.index==tr_id])

In [625]:
seed = 51006
x_ = simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=seed, use_zenith=True)
y_ = simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=seed, use_zenith=False)
t_ = x_.to_frame('trip_mode_zenith').merge(y_.to_frame('trip_mode_asim'), left_index=True, right_index=True)
diffs = t_.loc[t_.trip_mode_zenith != t_.trip_mode_asim]
print(f"seed {seed} leads to {diffs.shape[0]} differences out of {t_.shape[0]}. tripids {diffs.index}")

seed 51006 leads to 34 differences out of 168. tripids Int64Index([  86673657,   86673661,  106741681,  106741682,  106741685,
             106741686,  106741687,  106741688,  211327433,  211327437,
             444793574,  484173905,  484173909,  535170694,  535620053,
             708171014,  943749470,  943749471, 1060575853, 1091770617,
            1146472489, 1146472493, 1276281769, 1276281773, 1658748793,
            1658748797, 1767013726, 1767186577, 1767186578, 1768237161,
            1768237165, 1768237166, 2463663417, 2463663421],
           dtype='int64', name='trip_id')


In [None]:
seed 51006 leads to 3 differences. tripids Int64Index([86673661, 535170689, 1060575849], dtype='int64', name='trip_id')


In [753]:
rng_ = default_rng(seed=100)
new_utils = pd.DataFrame(0.1 * -np.log(-np.log(rng_.uniform(0,1,raw_utilities.shape))), columns=raw_utilities.columns)
new_utils.index = raw_utilities.index

In [767]:
nested_exp_utilities_cf = simulate.compute_nested_exp_utilities(new_utils, nest_spec)
nested_probabilities_cf = simulate.compute_nested_probabilities(nested_exp_utilities_cf, nest_spec, trace_label=None)
base_probabilities_cf = simulate.compute_base_probabilities(nested_probabilities_cf, nest_spec, spec)
base_probabilities_cf

Unnamed: 0_level_0,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
137248721,0.056642,0.037305,0.04278,0.029576,0.086154,0.026586,0.088313,0.100331,0.028021,0.018516,0.064391,0.020009,0.026399,0.022622,0.023007,0.021754,0.087126,0.021171,0.036475,0.02765,0.135171
137248725,0.101902,0.022876,0.031227,0.06999,0.056913,0.046685,0.088411,0.087785,0.025414,0.038162,0.023988,0.0288,0.039318,0.02016,0.048364,0.023003,0.033959,0.034597,0.037708,0.099523,0.041213
211388201,0.037562,0.070033,0.055951,0.040389,0.020041,0.095974,0.094678,0.094368,0.036014,0.025051,0.03486,0.034953,0.035499,0.02804,0.04365,0.024713,0.031388,0.025931,0.066999,0.052678,0.051228
211388205,0.03548,0.056256,0.043944,0.048085,0.042078,0.062221,0.106819,0.087265,0.035117,0.030399,0.035154,0.026469,0.050646,0.035727,0.024284,0.032905,0.039427,0.041092,0.054714,0.048459,0.063458
806388401,0.054918,0.040488,0.033149,0.054294,0.043477,0.067982,0.152465,0.074152,0.033404,0.028735,0.059269,0.024339,0.036968,0.02776,0.023522,0.060932,0.023445,0.023931,0.044643,0.033773,0.058353
806388405,0.032764,0.057301,0.018437,0.077099,0.046963,0.063521,0.094951,0.089072,0.030826,0.037896,0.035752,0.019878,0.049536,0.07961,0.026922,0.024833,0.02484,0.026695,0.079936,0.040146,0.043022


In [799]:
%%time

num_samples = 10000 # 7.5s per 100

data_zenith = []
data_asim = []
for i in range(num_samples):
    seed = rng_.integers(0, 100000)
    x_ = simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=seed, use_zenith=True, raw_utilities=new_utils)
    y_ = simple_simulate_rum_zenith_check(trips_segment, spec, nest_spec, locals_dict, estimator, tr_label, log_alt_losers, trace_column_names, seed=seed, use_zenith=False, raw_utilities=new_utils)
    data_zenith.append(x_)
    data_asim.append(y_)
    
data_asim = pd.concat(data_asim, axis=1)
data_zenith = pd.concat(data_zenith, axis=1)
# counts_zenith = data_zenith.apply(lambda x: x.value_counts(), axis=1).fillna(0)
# counts_zenith = counts_zenith / data_zenith.shape[1]
# counts_asim = data_asim.apply(lambda x: x.value_counts(), axis=1).fillna(0)
# counts_asim = counts_asim / data_asim.shape[1]

# mode_share_zenith = (counts_zenith.sum(axis=0) / counts_zenith.shape[0]).to_frame('zenith')
# mode_share_asim = (counts_asim.sum(axis=0) / counts_asim.shape[0]).to_frame('asim')
# mode_share_base_prob = (base_probabilities_cf.sum(axis=0) / base_probabilities_cf.shape[0]).to_frame('probs')
# assert np.allclose(mode_share_zenith.sum(), 1)
# assert np.allclose(mode_share_asim.sum(), 1)
# assert np.allclose(mode_share_base_prob.sum(), 1)
# mode_share_comp = mode_share_zenith.join(mode_share_asim, how='outer').join(mode_share_base_prob, how='outer').fillna(0)

CPU times: user 11min 49s, sys: 766 ms, total: 11min 50s
Wall time: 11min 52s


In [796]:
#temp_z = data_zenith.copy()
#temp_a = data_asim.copy()
#data_asim = data_asim.join(temp_a, lsuffix="_o", rsuffix="_n")
#data_zenith = data_zenith.join(temp_z, lsuffix="_o", rsuffix="_n")
# counts_zenith = data_zenith.apply(lambda x: x.value_counts(), axis=1).fillna(0)
# counts_zenith = counts_zenith / data_zenith.shape[1]
# counts_asim = data_asim.apply(lambda x: x.value_counts(), axis=1).fillna(0)
# counts_asim = counts_asim / data_asim.shape[1]

# mode_share_zenith = (counts_zenith.sum(axis=0) / counts_zenith.shape[0]).to_frame('zenith')
# mode_share_asim = (counts_asim.sum(axis=0) / counts_asim.shape[0]).to_frame('asim')
# mode_share_base_prob = (base_probabilities_cf.sum(axis=0) / base_probabilities_cf.shape[0]).to_frame('probs')
# assert np.allclose(mode_share_zenith.sum(), 1)
# assert np.allclose(mode_share_asim.sum(), 1)
# assert np.allclose(mode_share_base_prob.sum(), 1)
# mode_share_comp = mode_share_zenith.join(mode_share_asim, how='outer').join(mode_share_base_prob, how='outer').fillna(0)

In [805]:
# sample of 20000
with pd.option_context("precision", 3):
    display((100.0 * mode_share_comp).T)

Unnamed: 0,BIKE,DRIVEALONEFREE,DRIVEALONEPAY,DRIVE_COM,DRIVE_EXP,DRIVE_HVY,DRIVE_LOC,DRIVE_LRF,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,TAXI,TNC_SHARED,TNC_SINGLE,WALK,WALK_COM,WALK_EXP,WALK_HVY,WALK_LOC,WALK_LRF
zenith,8.055,5.167,4.631,3.502,3.606,4.266,3.963,3.498,3.895,5.176,4.799,5.851,4.999,6.121,4.668,9.486,4.228,4.125,2.977,3.671,3.317
asim,6.096,6.698,4.977,3.202,3.522,4.334,4.129,3.184,3.224,5.149,6.283,8.296,4.793,6.584,4.532,7.484,4.152,4.273,2.629,3.433,3.023
probs,8.883,5.321,4.738,2.89,3.136,4.003,3.565,3.162,3.758,5.324,4.927,6.049,5.341,6.541,5.037,10.427,3.973,4.224,2.574,3.147,2.979


In [806]:
# TODO: look at probs per trip, i.e. do not sum across trips
trip_id = 137248721

counts_zenith.loc[counts_zenith.index == trip_id].T.merge(
    counts_asim.loc[counts_asim.index == trip_id].T, suffixes=['_z', '_a'], left_index=True, right_index=True, how='outer').merge(
    base_probabilities_cf.loc[base_probabilities_cf.index == trip_id].T, suffixes=['', '_probs'], left_index=True, right_index=True, how='outer').fillna(0)

trip_id,137248721_z,137248721_a,137248721
BIKE,0.0927,0.05435,0.100331
DRIVEALONEFREE,0.05365,0.05005,0.056642
DRIVEALONEPAY,0.0388,0.03155,0.037305
DRIVE_COM,0.0287,0.0247,0.021171
DRIVE_EXP,0.0288,0.02605,0.021754
DRIVE_HVY,0.07815,0.0997,0.087126
DRIVE_LOC,0.02875,0.027,0.022622
DRIVE_LRF,0.0278,0.02445,0.023007
SHARED2FREE,0.04325,0.023,0.04278
SHARED2PAY,0.0336,0.01585,0.029576
