# validate results

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import argparse
from datetime import datetime

import numpy as np
import pandas as pd
from numpy.random import default_rng

from activitysim.cli import run
from activitysim.core import inject
from activitysim.core import tracing
from activitysim.core import config
from activitysim.core import pipeline
from activitysim.core import chunk
from activitysim.core import simulate
from activitysim.core import logit
from activitysim.abm.models.util import estimation
from activitysim.core import expressions
from activitysim.core.util import assign_in_place

In [4]:
pd.set_option("max_columns", 500)

In [5]:
root_dir = "/mnt/c/Users/jan.zill/code/activitysim"
example_dir = os.path.join(root_dir, "test_example_mtc")

In [6]:
os.chdir(example_dir)

In [7]:
parser = argparse.ArgumentParser()
run.add_run_args(parser)
# args = parser.parse_args()
# parser.parse_args(['--sum', '7', '-1', '42'])
args = parser.parse_args(['-c', 'configs', '-o', 'output', '-d', 'data'])
#run.run(args)  # 2mins full example run
if not inject.is_injectable('preload_injectables'):
    from activitysim import abm  # register abm steps and other abm-specific injectables
run.handle_standard_args(args)  # possibly update injectables

In [8]:
from fru_utils import run_trip_mode_choice
trips_df_ru = run_trip_mode_choice()
trips_df = run_trip_mode_choice(choose_individual_max_utility=False)

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'atwork' (27 trips)
trip_mode_choice tour_type 'eatout' (33 trips)
trip_mode_choice tour_type 'escort' (6 trips)
trip_mode_choice tour_type 'othdiscr' (43 trips)
trip_mode_choice tour_type 'othmaint' (46 trips)
trip_mode_choice tour_type 'school' (37 trips)
trip_mode_choice tour_type 'shopping' (77 trips)
trip_mode_choice tour_type 'social' (19 trips)
trip_mode_choice tour_type 'univ' (26 trips)
trip_mode_choice tour_type 'work' (168 trips)


register joint_tour_participants: no rows with household_id in [982875].


Done


estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'atwork' (27 trips)
trip_mode_choice tour_type 'eatout' (33 trips)
trip_mode_choice tour_type 'escort' (6 trips)
trip_mode_choice tour_type 'othdiscr' (43 trips)
trip_mode_choice tour_type 'othmaint' (46 trips)
trip_mode_choice tour_type 'school' (37 trips)
trip_mode_choice tour_type 'shopping' (77 trips)
trip_mode_choice tour_type 'social' (19 trips)
trip_mode_choice tour_type 'univ' (26 trips)
trip_mode_choice tour_type 'work' (168 trips)
Done


In [9]:
c_ = ["trip_mode", "mode_choice_logsum"]
trips = trips_df_ru[c_].merge(trips_df[c_], left_index=True, right_index=True, how="outer", suffixes=["_fru", "_asim"])
pd.merge(trips_df_ru.trip_mode.value_counts(), trips_df.trip_mode.value_counts(), left_index=True, right_index=True,
         suffixes=["_fru", "_asim"])

Unnamed: 0,trip_mode_fru,trip_mode_asim
WALK,299,304
WALK_LOC,94,85
WALK_LRF,57,53
BIKE,17,17
DRIVEALONEFREE,6,4
WALK_HVY,4,4
SHARED2FREE,3,4
TNC_SINGLE,2,4


In [10]:
if np.allclose(trips.mode_choice_logsum_fru, trips.mode_choice_logsum_asim):
    print("Logsums agree")
else:
    ((trips.mode_choice_logsum_fru - trips.mode_choice_logsum_asim) / trips.mode_choice_logsum_fru).hist(bins=50)

Logsums agree


In [12]:
#simulate.compute_nested_utilities(ru[0], ns[0])

# Run MC validation

see logit._each_nest parent_nest_scale for leaf node, and simulate.compute_nested_utilities for discussion of scales
of alternatives

easiest way is to run a simulation

In [26]:
from fru_utils import get_stuff
t, ru, neu, nu, ns, nested_probs, base_probs = get_stuff(do_these_purposes=["social"])

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'social' (19 trips)
Done


In [27]:
nest_spec = ns[0]
for nest in logit.each_nest(nest_spec, type='node', post_order=False):
    nest.print()

Nest name: root level: 1 coefficient: 1.0 product_of_coefficients: 1.0 ancestors: ['root'] parent nest scale: 0
Nest name: AUTO level: 2 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO'] parent nest scale: 1.0
Nest name: NONMOTORIZED level: 2 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'NONMOTORIZED'] parent nest scale: 1.0
Nest name: TRANSIT level: 2 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'TRANSIT'] parent nest scale: 1.0
Nest name: RIDEHAIL level: 2 coefficient: 0.36 product_of_coefficients: 0.36 ancestors: ['root', 'RIDEHAIL'] parent nest scale: 1.0


In [28]:
for nest in logit.each_nest(nest_spec, post_order=True):
    name = nest.name
    if nest.is_leaf:
        print(f"leaf {name} with {nest.product_of_coefficients}, {nest.parent_nest_scale}")
    else:
        print(f"node {name} with {nest.coefficient}, {nest.parent_nest_scale}")

leaf DRIVEALONEFREE with 0.72, 0.72
leaf DRIVEALONEPAY with 0.72, 0.72
leaf SHARED2FREE with 0.72, 0.72
leaf SHARED2PAY with 0.72, 0.72
leaf SHARED3FREE with 0.72, 0.72
leaf SHARED3PAY with 0.72, 0.72
node AUTO with 0.72, 1.0
leaf WALK with 0.72, 0.72
leaf BIKE with 0.72, 0.72
node NONMOTORIZED with 0.72, 1.0
leaf WALK_LOC with 0.72, 0.72
leaf WALK_LRF with 0.72, 0.72
leaf WALK_EXP with 0.72, 0.72
leaf WALK_HVY with 0.72, 0.72
leaf WALK_COM with 0.72, 0.72
leaf DRIVE_LOC with 0.72, 0.72
leaf DRIVE_LRF with 0.72, 0.72
leaf DRIVE_EXP with 0.72, 0.72
leaf DRIVE_HVY with 0.72, 0.72
leaf DRIVE_COM with 0.72, 0.72
node TRANSIT with 0.72, 1.0
leaf TAXI with 0.36, 0.36
leaf TNC_SINGLE with 0.36, 0.36
leaf TNC_SHARED with 0.36, 0.36
node RIDEHAIL with 0.36, 1.0
node root with 1.0, 0


In [39]:
nest_spec = ns[0]
for nest in logit.each_nest(nest_spec, type='leaf'):
    nest.print()

Nest name: DRIVEALONEFREE level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'DRIVEALONEFREE'] parent nest scale: 0.72
Nest name: DRIVEALONEPAY level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'DRIVEALONEPAY'] parent nest scale: 0.72
Nest name: SHARED2FREE level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'SHARED2FREE'] parent nest scale: 0.72
Nest name: SHARED2PAY level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'SHARED2PAY'] parent nest scale: 0.72
Nest name: SHARED3FREE level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'SHARED3FREE'] parent nest scale: 0.72
Nest name: SHARED3PAY level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'AUTO', 'SHARED3PAY'] parent nest scale: 0.72
Nest name: WALK level: 3 coefficient: 0.72 product_of_coefficients: 0.72 ancestors: ['root', 'NONMOTORIZED', 'WA

In [29]:
# nested_probs[0] - these are relative in each nest, so the full probs are potentially just the product of these
base_probs[0]

Unnamed: 0_level_0,DRIVEALONEFREE,DRIVEALONEPAY,SHARED2FREE,SHARED2PAY,SHARED3FREE,SHARED3PAY,WALK,BIKE,WALK_LOC,WALK_LRF,WALK_EXP,WALK_HVY,WALK_COM,DRIVE_LOC,DRIVE_LRF,DRIVE_EXP,DRIVE_HVY,DRIVE_COM,TAXI,TNC_SINGLE,TNC_SHARED
trip_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8805121,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.266799e-08,7.843494e-08,2.071998e-08
8805125,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.057327e-08,7.325707e-08,1.950799e-08
8805126,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.864169e-08,9.376443e-08,2.428075e-08
468119921,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.096638e-08,2.754998e-10,4.051502e-08
468119922,0.0,0.0,0.0,0.0,0.0,0.0,0.999874,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.658716e-05,6.599176e-07,9.832967e-05
468119925,0.0,0.0,0.0,0.0,0.0,0.0,0.999875,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.651144e-05,6.601311e-07,9.83137e-05
468119926,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.568041e-08,2.408144e-10,3.615344e-08
468119927,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.308505e-08,1.315547e-10,2.041405e-08
468119928,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.367465e-09,1.493582e-10,1.939597e-08
642446345,0.0,0.0,0.004262023,0.0,4.103657e-14,0.0,0.039196,0.0,0.19705,0.759044,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.388831e-05,0.0001141304,0.0002502967


In [13]:
# for later: this seems to be a non-driving segment - grab workers or something later?
# for now: implement a run with seeding so I can reproduce these counts

# actually, can I manipulate pipeline OR inject table dynamically? If I reproduce a single observation many times we
# can check mode shares
# 1) check tests
# 2) check code in mode choice, maybe duplicate here, see old code below

In [40]:
def mode_choice_for_trip(choose_individual_max_utility, trip_id_to_check, num_samples):
    """open pipeline and load stuff for mode choice dev assuming model has been run and pipeline.h5 exists"""
    resume_after = "trip_scheduling"
    model_name = "trip_mode_choice"
    chunk_size = 0  # test_mtc means no chunking

    pipeline.open_pipeline(resume_after)
    # preload any bulky injectables (e.g. skims) not in pipeline
    inject.get_injectable('preload_injectables', None)
    pipeline._PIPELINE.rng().begin_step(model_name)
    #step_name = model_name
    args = {}
    #checkpoint = pipeline.intermediate_checkpoint(model_name)
    inject.set_step_args(args)

    trips = inject.get_table('trips')
    tours_merged = inject.get_table('tours_merged')
    network_los = inject.get_injectable('network_los')

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    print("Running with %d trips", trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(
        trips_df,
        tours_merged,
        left_on='tour_id',
        right_index=True,
        how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_specs = config.get_logit_model_settings(model_settings)

    estimator = estimation.manager.begin_estimation('trip_mode_choice')

    #choices_list = []


    # grab one, duplicate num_samples times
    trips_segment = trips_merged.loc[trips_merged.index == trip_id_to_check].copy()
    primary_purpose = trips_segment['primary_purpose'].values[0]
    trips_segment = trips_segment.loc[trips_segment.index.repeat(num_samples)]

    # need to add new row_states for rng here, need to ensure there are no collisions with existing keys
    existing_indexes = pipeline._PIPELINE.rng().get_channel_for_df(trips_merged).row_states.index.values
    num_new_indexes = trips_segment.shape[0]
    new_indexes = np.arange(existing_indexes.max()+1, existing_indexes.max() + num_new_indexes + 1)

    trips_segment.index = new_indexes #+= np.arange(num_samples)
    # name index so tracing knows how to slice
    trips_segment.index.name = 'trip_id'


    # #logger.warning("Change seeding back when done with testing")
    # pipeline._PIPELINE.rng.row_states = pd.DataFrame(columns=['row_seed', 'offset'], index=trips_segment.index)
    # pipeline._PIPELINE.rng.row_states["row_seed"] = trips_segment.index.values
    # pipeline._PIPELINE.rng.row_states["offset"] = 0
    pipeline._PIPELINE.rng().add_channel("trips", trips_segment)


    #for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'):
    #if (do_these_purposes is not None) and (primary_purpose not in do_these_purposes):
    #    continue

    print("trip_mode_choice tour_type '%s' (%s trips)" %
          (primary_purpose, len(trips_segment.index), ))

    coefficients = simulate.get_segment_coefficients(model_settings, primary_purpose)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(coefficients)

    segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose)

    expressions.annotate_preprocessors(
        trips_segment, locals_dict, skims,
        model_settings, segment_trace_label)

    locals_dict.update(skims)

    ################ Replace wrapper function
    #     choices = mode_choice_simulate(...)
    spec=simulate.eval_coefficients(model_spec, coefficients, estimator)
    nest_spec = simulate.eval_nest_coefficients(nest_specs, coefficients, segment_trace_label)
    choices = simulate.simple_simulate(
        choosers=trips_segment,
        spec=spec,
        nest_spec=nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        want_logsums=logsum_column_name is not None,
        trace_label=segment_trace_label,
        trace_choice_name='trip_mode_choice',
        estimator=estimator,
        trace_column_names=None,
        choose_individual_max_utility=choose_individual_max_utility)
    # for consistency, always return dataframe, whether or not logsums were requested
    if isinstance(choices, pd.Series):
        choices = choices.to_frame('choice')
    choices.rename(columns={'logsum': logsum_column_name,
                            'choice': mode_column_name},
                   inplace=True)
    if not choose_individual_max_utility:
        alts = spec.columns
        choices[mode_column_name] = choices[mode_column_name].map(dict(list(zip(list(range(len(alts))), alts))))
    ################
    #choices_list.append(choices)
    #choices_df_asim = pd.concat(choices_list)

    # update trips table with choices (and potionally logssums)
    #trips_df = trips_merged.copy() # trips.to_frame()

    #if (do_these_purposes is not None):
    #    trips_df  = trips_df.loc[trips_df.primary_purpose.isin(do_these_purposes)]

    #assign_in_place(trips_df, choices)
    #assert not trips_df[mode_column_name].isnull().any()

    finalise = True
    if finalise:
        inject.set_step_args(None)
        #
        pipeline._PIPELINE.rng().end_step(model_name)
        pipeline.add_checkpoint(model_name)
        if not pipeline.intermediate_checkpoint():
            pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

        pipeline.close_pipeline()

    print("Done")
    return trips_merged, choices


def comp_mode_shares(base_probs, choose_individual_max_utility, num_samples, trip_id_to_check):
    t_, c_ = mode_choice_for_trip(choose_individual_max_utility=choose_individual_max_utility,
                                  trip_id_to_check=trip_id_to_check, num_samples=num_samples)

    sim_mode_shares = c_.trip_mode.value_counts() / c_.shape[0]
    #sim_mode_shares.columns = ["mode_share_sim"]
    obs_probs = base_probs[0].loc[base_probs[0].index == trip_id_to_check].T
    obs_probs.columns = ["mode_share_obs"]
    ms_comp = obs_probs.merge(sim_mode_shares, left_index=True, right_index=True, how="outer").fillna(0)
    ms_comp["diff"] = ms_comp["trip_mode"] - ms_comp["mode_share_obs"]
    ms_comp["rel_diff"] = ms_comp["diff"] / ms_comp["mode_share_obs"]
    #ms_comp.style.format('{:.2}')
    #with pd.option_context("precision", 3):
    #    display(ms_comp)
    display(ms_comp.loc[((ms_comp.mode_share_obs != 0) | (ms_comp.trip_mode != 0)), ["mode_share_obs", "trip_mode"]].T)
    return ms_comp

In [41]:
choose_individual_max_utility = False
num_samples = 10000
trip_id_to_check = 642446345
ms_comp = comp_mode_shares(base_probs, choose_individual_max_utility, num_samples, trip_id_to_check)

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'social' (10000 trips)
Done


Unnamed: 0,SHARED2FREE,SHARED3FREE,TAXI,TNC_SHARED,TNC_SINGLE,WALK,WALK_LOC,WALK_LRF
mode_share_obs,0.004262,4.103657e-14,8.4e-05,0.00025,0.000114,0.039196,0.19705,0.759044
trip_mode,0.0043,0.0,0.0001,0.0005,0.0001,0.0391,0.2,0.7559


In [None]:
choose_individual_max_utility = True
num_samples = 100000  # 1e5 takes about 25s
trip_id_to_check = 642446345 # 2464104885 # 642446345 # 1767182945
ms_comp = comp_mode_shares(base_probs, choose_individual_max_utility, num_samples, trip_id_to_check)

register joint_tour_participants: no rows with household_id in [982875].
estimation bundle trip_mode_choice not in settings file estimation.yaml


Running with %d trips 482
trip_mode_choice tour_type 'social' (100000 trips)
