In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import argparse
from datetime import datetime

import numpy as np
import pandas as pd

from activitysim.cli import run
from activitysim.core import inject

In [4]:
pd.set_option("display.max_columns", 500)

In [5]:
root_dir = "/home/jan/code/activitysim"
example_dir = os.path.join(root_dir, "test_runs", "seq_asim")

In [6]:
os.chdir(example_dir)

FileNotFoundError: [Errno 2] No such file or directory: '/home/jan/code/activitysim/test_runs/seq_asim'

In [None]:
parser = argparse.ArgumentParser()
run.add_run_args(parser)
args = parser.parse_args(['-c', 'configs_lessmodes', '-o', 'output_fru', '-d', 'data'])
#run.run(args)  # 2mins full example run
if not inject.is_injectable('preload_injectables'):
    from activitysim import abm  # register abm steps and other abm-specific injectables
run.handle_standard_args(args)  # possibly update injectables

In [None]:
from activitysim.core import inject
from activitysim.core import pipeline
from activitysim.core import config
from activitysim.core import simulate
from activitysim.abm.models.util import estimation
from activitysim.abm.tables import shadow_pricing
from activitysim.core import interaction_simulate
from activitysim.core import logit
from activitysim.core.simulate import set_skim_wrapper_targets
from activitysim.core import chunk

In [19]:
from activitysim.core.logit import inverse_ev1_cdf

def hack_make_sample_choices(
    choosers, probs,
    alternatives,
    sample_size, alternative_count, alt_col_name,
    allow_zero_probs,
    trace_label,
    utilities=None,
    choose_individual_max_utility=False
):
    assert isinstance(probs, pd.DataFrame)
    assert probs.shape == (len(choosers), alternative_count)
    assert isinstance(alternatives, pd.DataFrame)
    assert len(alternatives) == alternative_count

    if allow_zero_probs:
        zero_probs = (probs.sum(axis=1) == 0)
        if zero_probs.all():
            return pd.DataFrame(columns=[alt_col_name, 'rand', 'prob', choosers.index.name])
        if zero_probs.any():
            # remove from sample
            probs = probs[~zero_probs]
            choosers = choosers[~zero_probs]
            # TODO [janzill Jun2022]: do we want this for consistency?
            #  might need this in other places too?
            if utilities is not None:
                utilities = utilities[~zero_probs]

    if choose_individual_max_utility:
        assert isinstance(utilities, pd.DataFrame)
        #print(utilities.head(3))
        assert utilities.shape == (len(choosers), alternative_count)
        #print(utilities.shape)

        choice_dimension = (len(choosers), alternative_count, sample_size)
        rands = pipeline.get_rn_generator().random_for_df(utilities, n=alternative_count*sample_size)
        #print(f"after generation rands shape = {rands.shape}", flush=True)
        rands = rands.reshape(choice_dimension)
        #print(f"before inverse_ev1 rands shape = {rands.shape}", flush=True)
        rands = inverse_ev1_cdf(rands)
        #print(f"after inverse_ev1 rands shape = {rands.shape}", flush=True)
        utilities = utilities.to_numpy()  # this should be much cleaner once xarray changes are implemented
        utilities = np.repeat(utilities[:,:,None], sample_size, axis=2)
        #print(f"after utils reshape: {utilities.shape}", flush=True)
        utilities += rands
        # this gives us len(choosers), sample_size dimensions, with values the chosen alternative
        choices_array = np.argmax(utilities, axis=1)
        print(choices_array.shape)

        choosers_index_rep = np.tile(np.arange(0,choices_array.shape[0]), sample_size)
        #np.repeat(np.arange(0,choices_array.shape[0]), sample_size)
        choices_flattened = choices_array.flatten(order='F')
        #print(f"choices flattened shape = {choices_flattened.shape}")

        print(choosers_index_rep.shape, flush=True)
        print(probs.shape, flush=True)
        print(probs.head(3), flush=True)
        probs_look_up = probs.to_numpy()[choosers_index_rep, choices_flattened]
        #print(f"probs_look_up shape = {probs_look_up.shape}", flush=True)

        # choices_flattened are 0-based index into alternatives, need to map to alternative values given by
        #  alternatives.index.values (they are in this order by construction)
        # explode to one row per chooser.index, alt_zone_id
        choices_df = pd.DataFrame({
            alt_col_name: alternatives.index.values[choices_flattened],
            #'rand': rands.flatten(order='F'),
            'rand': np.zeros_like(choosers_index_rep), # TODO: zero out for now
            'prob': probs_look_up.flatten(order='F'),
            # repeat is wrong here - we do not want 1,1,2,2,3,3, etc, but 1,2,3,1,2,3 by construction
            #choosers.index.name: np.repeat(np.asanyarray(choosers.index), sample_size)
            choosers.index.name: np.tile(choosers.index.values, sample_size)
        })

    else:
        cum_probs_array = probs.values.cumsum(axis=1)
        # alt probs in convenient layout to return prob of chose alternative
        # (same layout as cum_probs_arr)
        alt_probs_array = probs.values.flatten()
        # get sample_size rands for each chooser
        rands = pipeline.get_rn_generator().random_for_df(probs, n=sample_size)
        # transform as we iterate over alternatives
        # reshape so rands[i] is in broadcastable (2-D) shape for cum_probs_arr
        # i.e rands[i] is a 2-D array of one alt choice rand for each chooser
        rands = rands.T.reshape(sample_size, -1, 1)
        # the alternative value chosen
        choices_array = np.empty([sample_size, len(choosers)]).astype(alternatives.index.dtype)
        # chunk log these later after we populate them...
        # the probability of the chosen alternative
        choice_probs_array = np.empty([sample_size, len(choosers)])
        # chunk log these later after we populate them...
        alts = np.tile(alternatives.index.values, len(choosers))
        # FIXME - do this all at once rather than iterate?
        for i in range(sample_size):
            # FIXME - do this in numpy, not pandas?
            # rands for this alt in broadcastable shape
            r = rands[i]

            # position of first occurrence of positive value
            positions = np.argmax(cum_probs_array > r, axis=1)

            # FIXME - leave positions as numpy array, not pandas series?
            # positions is series with the chosen alternative represented as a column index in probs
            # which is an integer between zero and num alternatives in the alternative sample
            positions = pd.Series(positions, index=probs.index)

            # need to get from an integer offset into the alternative sample to the alternative index
            # that is, we want the index value of the row that is offset by <position> rows into the
            # tranche of this choosers alternatives created by cross join of alternatives and choosers

            # offsets is the offset into model_design df of first row of chooser alternatives
            offsets = np.arange(len(positions)) * alternative_count

            # choices and choice_probs have one element per chooser and is in same order as choosers
            choices_array[i] = np.take(alts, positions + offsets)
            choice_probs_array[i] = np.take(alt_probs_array, positions + offsets)
            del positions
            del offsets

        del alts
        del cum_probs_array
        del alt_probs_array

        # explode to one row per chooser.index, alt_zone_id
        choices_df = pd.DataFrame(
            {alt_col_name: choices_array.flatten(order='F'),
             'rand': rands.flatten(order='F'),
             'prob': choice_probs_array.flatten(order='F'),
             choosers.index.name: np.repeat(np.asanyarray(choosers.index), sample_size)
             })

    return choices_df

In [7]:
def make_sample_choices_dev(
    choosers, probs,
    alternatives,
    sample_size, alternative_count, alt_col_name,
    allow_zero_probs,
    trace_label,
    utilities=None,
    choose_individual_max_utility=False
):
    assert isinstance(probs, pd.DataFrame)
    assert probs.shape == (len(choosers), alternative_count)
    assert isinstance(alternatives, pd.DataFrame)
    assert len(alternatives) == alternative_count

    if allow_zero_probs:
        zero_probs = (probs.sum(axis=1) == 0)
        if zero_probs.all():
            return pd.DataFrame(columns=[alt_col_name, 'rand', 'prob', choosers.index.name])
        if zero_probs.any():
            # remove from sample
            probs = probs[~zero_probs]
            choosers = choosers[~zero_probs]
            # TODO [janzill Jun2022]: do we want this for consistency?
            #  might need this in other places too?
            if utilities is not None:
                utilities = utilities[~zero_probs]


    if choose_individual_max_utility:
     # Note [janzill Jun2022]: this needs for loop for memory like previous method, an array of dimension
        #   (len(choosers), alternative_count, sample_size) can get very large
        choices_array = np.zeros_like(utilities, dtype=np.uint32)
        zero_dim_index = np.arange(utilities.shape[0])

        utils_array = utilities.to_numpy()  # TODO [janzill Jun2022]: once or for each?
        for i in range(sample_size):
            rands = pipeline.get_rn_generator().random_for_df(utilities, n=alternative_count)
            choices_array[zero_dim_index, np.argmax(utils_array - np.log(-np.log(rands)), axis=1)] += 1

        i, j = np.nonzero(choices_array)
        chunk.log_df(trace_label, 'i', i)
        chunk.log_df(trace_label, 'j', j)

        #probs = logit.utils_to_probs(utilities, allow_zero_probs=allow_zero_probs,
        #                             trace_label=trace_label, trace_choosers=choosers)
        del utilities
        choices_df = pd.DataFrame({
            alt_col_name: alternatives.index.values[j],
            "pick_count": choices_array[i, j],
            "prob": probs.to_numpy()[i, j],
            choosers.index.name: choosers.index.values[i]
        })
        del choices_array
        del i
        del j
        # del probs
    
    else:
        cum_probs_array = probs.values.cumsum(axis=1)
        # alt probs in convenient layout to return prob of chose alternative
        # (same layout as cum_probs_arr)
        alt_probs_array = probs.values.flatten()
        # get sample_size rands for each chooser
        rands = pipeline.get_rn_generator().random_for_df(probs, n=sample_size)
        # transform as we iterate over alternatives
        # reshape so rands[i] is in broadcastable (2-D) shape for cum_probs_arr
        # i.e rands[i] is a 2-D array of one alt choice rand for each chooser
        rands = rands.T.reshape(sample_size, -1, 1)
        # the alternative value chosen
        choices_array = np.empty([sample_size, len(choosers)]).astype(alternatives.index.dtype)
        # chunk log these later after we populate them...
        # the probability of the chosen alternative
        choice_probs_array = np.empty([sample_size, len(choosers)])
        # chunk log these later after we populate them...
        alts = np.tile(alternatives.index.values, len(choosers))
        # FIXME - do this all at once rather than iterate?
        for i in range(sample_size):
            # FIXME - do this in numpy, not pandas?
            # rands for this alt in broadcastable shape
            r = rands[i]

            # position of first occurrence of positive value
            positions = np.argmax(cum_probs_array > r, axis=1)

            # FIXME - leave positions as numpy array, not pandas series?
            # positions is series with the chosen alternative represented as a column index in probs
            # which is an integer between zero and num alternatives in the alternative sample
            positions = pd.Series(positions, index=probs.index)

            # need to get from an integer offset into the alternative sample to the alternative index
            # that is, we want the index value of the row that is offset by <position> rows into the
            # tranche of this choosers alternatives created by cross join of alternatives and choosers

            # offsets is the offset into model_design df of first row of chooser alternatives
            offsets = np.arange(len(positions)) * alternative_count

            # choices and choice_probs have one element per chooser and is in same order as choosers
            choices_array[i] = np.take(alts, positions + offsets)
            choice_probs_array[i] = np.take(alt_probs_array, positions + offsets)
            del positions
            del offsets

        del alts
        del cum_probs_array
        del alt_probs_array

        # explode to one row per chooser.index, alt_zone_id
        choices_df = pd.DataFrame(
            {alt_col_name: choices_array.flatten(order='F'),
             'rand': rands.flatten(order='F'),
             'prob': choice_probs_array.flatten(order='F'),
             choosers.index.name: np.repeat(np.asanyarray(choosers.index), sample_size)
             })

        # pick_count and pick_dup
        # pick_count is number of duplicate picks
        # pick_dup flag is True for all but first of duplicates
        pick_group = choices_df.groupby([choosers.index.name, alt_col_name])
        # number each item in each group from 0 to the length of that group - 1.
        choices_df['pick_count'] = pick_group.cumcount(ascending=True)
        # flag duplicate rows after first
        choices_df['pick_dup'] = choices_df['pick_count'] > 0
        # add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge)
        choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1
        # drop the duplicates
        choices_df = choices_df[~choices_df['pick_dup']]
        del choices_df['pick_dup']
        # set index after groupby so we can trace on it
        choices_df.set_index(choosers.index.name, inplace=True)
        # don't need this after tracing
        del choices_df['rand']
        
    return choices_df

In [8]:
%%time

choose_individual_max_utility = True

resume_after = "work_from_home"
model_name = "school_location"
chunk_size = 0  # no chunking

pipeline.open_pipeline(resume_after)
# preload any bulky injectables (e.g. skims) not in pipeline
inject.get_injectable('preload_injectables', None)
pipeline._PIPELINE.rng().begin_step(model_name)
#step_name = model_name
args = {}
#checkpoint = pipeline.intermediate_checkpoint(model_name)
inject.set_step_args(args)


persons_merged = inject.get_table('persons_merged')
network_los = inject.get_injectable('network_los')
households = inject.get_table('households')
persons = inject.get_table('persons')
locutor = inject.get_injectable('locutor')

trace_label = model_name #'school_location'
model_settings_file_name = f"{model_name}.yaml" #'school_location.yaml'
model_settings = config.read_model_settings(model_settings_file_name)
estimator = estimation.manager.begin_estimation(model_name)
# iterate_location_choice()
chunk_tag = trace_label

# boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']
dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None
persons_merged_df = persons_merged.to_frame()
persons_merged_df = persons_merged_df[persons_merged_df[chooser_filter_column]]
persons_merged_df.sort_index(inplace=True)  # interaction_sample expects chooser index to be monotonic increasing

# chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types
chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']
assert chooser_segment_column in persons_merged_df, f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in " \
                                                    f"persons_merged table."
shadow_price_calculator = shadow_pricing.load_shadow_price_calculator(model_settings)
chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']
# maps segment names to compact (integer) ids
segment_ids = model_settings['SEGMENT_IDS']

sample_list = []
for segment_name, segment_id in segment_ids.items():
    print(f"running {segment_name}, {segment_id}")
    choosers = persons_merged_df[persons_merged_df[chooser_segment_column] == segment_id]
    # size_term and shadow price adjustment - one row per zone
    dest_size_terms = shadow_price_calculator.dest_size_terms(segment_name)
    assert dest_size_terms.index.is_monotonic_increasing, f"shadow_price_calculator.dest_size_terms({segment_name}) " \
                                                         f"not monotonic_increasing"
    if choosers.shape[0] == 0:
        print(f"{trace_label} skipping segment {segment_name}: no choosers")
        continue
    print(f"dropping {(~(dest_size_terms.size_term > 0)).sum()} "
          f"of {len(dest_size_terms)} rows where size_term is zero")
    dest_size_terms = dest_size_terms[dest_size_terms.size_term > 0]
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers_location_sample = choosers[chooser_columns]
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('home_zone_id', 'zone_id')
    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    assert not choosers_location_sample.empty
    print("Running %s with %d persons" % (trace_label, len(choosers_location_sample.index)))
    sample_size = model_settings["SAMPLE_SIZE"]
    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    locals_d.update(constants)
    spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC',
                                     segment_name=segment_name, estimator=estimator)
    ### choices = interaction_sample()
    alt_col_name=alt_dest_col_name
    allow_zero_probs=False
    log_alt_losers=False
    # we return alternatives ordered in (index, alt_col_name)
    # if choosers index is not ordered, it is probably a mistake, since the alts wont line up
    assert alt_col_name is not None
    assert choosers.index.is_monotonic_increasing

    # FIXME - legacy logic - not sure this is needed or even correct?
    sample_size = min(sample_size, len(dest_size_terms.index))

    result_list = []
    for i, chooser_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers(choosers_location_sample, chunk_size, trace_label,
                                                                               chunk_tag):

        ### choices = hack_interaction_sample
        # chooser = chooser_chunk
        #alternatives = dest_size_terms
        #trace_label=chunk_trace_label

        num_choosers = len(chooser_chunk.index)
        assert num_choosers > 0
        if len(spec.columns) > 1:
            raise RuntimeError('spec must have only one column')
        # if using skims, copy index into the dataframe, so it will be
        # available as the "destination" for set_skim_wrapper_targets
        if skims is not None and dest_size_terms.index.name not in dest_size_terms:
            dest_size_terms = dest_size_terms.copy()
            dest_size_terms[dest_size_terms.index.name] = dest_size_terms.index

        chooser_index_id = interaction_simulate.ALT_CHOOSER_ID if log_alt_losers else None

        # - cross join choosers and alternatives (cartesian product)
        # for every chooser, there will be a row for each alternative
        # index values (non-unique) are from alternatives df
        alternative_count = dest_size_terms.shape[0]
        interaction_df =\
            logit.interaction_dataset(chooser_chunk, dest_size_terms, sample_size=alternative_count,
                                      chooser_index_id=chooser_index_id)

        assert alternative_count == len(interaction_df.index) / len(chooser_chunk.index)

        if skims is not None:
            set_skim_wrapper_targets(interaction_df, skims)

        # evaluate expressions from the spec multiply by coefficients and sum
        # spec is df with one row per spec expression and one col with utility coefficient
        # column names of interaction_df match spec index values
        # utilities has utility value for element in the cross product of choosers and alternatives
        # interaction_utilities is a df with one utility column and one row per row in interaction_df
        trace_rows = trace_ids = None

        # interaction_utilities is a df with one utility column and one row per interaction_df row
        interaction_utilities, trace_eval_results = interaction_simulate.eval_interaction_utilities(
            spec, interaction_df, locals_d, chunk_trace_label, trace_rows, estimator=None,
            log_alt_losers=log_alt_losers
        )
        # ########### HWM - high water mark (point of max observed memory usage)
        #del interaction_df

        # reshape utilities (one utility column and one row per row in interaction_utilities)
        # to a dataframe with one row per chooser and one column per alternative
        utilities = pd.DataFrame(
            interaction_utilities.values.reshape(len(chooser_chunk), alternative_count),
            index=chooser_chunk.index)
        #del interaction_utilities

        # convert to probabilities (utilities exponentiated and normalized to probs)
        # probs is same shape as utilities, one row per chooser and one column for alternative
        probs = logit.utils_to_probs(utilities, allow_zero_probs=allow_zero_probs,
                                     trace_label=chunk_trace_label, trace_choosers=chooser_chunk)
        #del utilities

        choices_df = make_sample_choices_dev(
            chooser_chunk, probs, dest_size_terms,
            sample_size, alternative_count, alt_col_name,
            allow_zero_probs=allow_zero_probs,
            trace_label=chunk_trace_label,
            utilities=utilities,
            choose_individual_max_utility=choose_individual_max_utility
        )
        # - NARROW
        choices_df['prob'] = choices_df['prob'].astype(np.float32)
        assert (choices_df['pick_count'].max() < 4294967295) or (choices_df.empty)
        choices_df['pick_count'] = choices_df['pick_count'].astype(np.uint32)

        if choices_df.shape[0] > 0:
            result_list.append(choices_df)

    if len(result_list) > 1:
        choices_df = pd.concat(result_list)
    
    # TODO: why does this fail
    #assert allow_zero_probs or (len(choosers_location_sample.index) == len(np.unique(choices_df.index.values))), \
    #    "what is this"
    
    # keep alts in canonical order so choices based on their probs are stable across runs
    choices_df = choices_df.sort_values(by=alt_col_name).sort_index(kind='mergesort')

    sample_list.append(choices_df)

finalise = True
if finalise:
    inject.set_step_args(None)
    #
    pipeline._PIPELINE.rng().end_step(model_name)
    pipeline.add_checkpoint(model_name)
    if not pipeline.intermediate_checkpoint():
        pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

    pipeline.close_pipeline()

NameError: name 'pipeline' is not defined

In [37]:
def run_fru():
    test_df = make_sample_choices_dev(
        chooser_chunk, probs, dest_size_terms,
        sample_size, alternative_count, alt_col_name,
        allow_zero_probs=allow_zero_probs,
        trace_label=chunk_trace_label,
        utilities=utilities,
        choose_individual_max_utility=True
    )
    

def run_previous():
    test_df = make_sample_choices_dev(
        chooser_chunk, probs, dest_size_terms,
        sample_size, alternative_count, alt_col_name,
        allow_zero_probs=allow_zero_probs,
        trace_label=chunk_trace_label,
        utilities=None,
        choose_individual_max_utility=False
    )

In [17]:
%prun -s cumulative run_previous()

 

         37642 function calls (37233 primitive calls) in 3.187 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    3.187    3.187 {built-in method builtins.exec}
        1    0.003    0.003    3.187    3.187 <string>:1(<module>)
        1    0.000    0.000    3.185    3.185 2292568830.py:12(run_previous)
        1    2.165    2.165    3.185    3.185 3493973527.py:3(make_sample_choices_dev)
  170/148    0.006    0.000    0.344    0.002 {built-in method numpy.core._multiarray_umath.implement_array_function}
        3    0.332    0.111    0.332    0.111 {method 'cumsum' of 'numpy.ndarray' objects}
        4    0.241    0.060    0.241    0.060 {method 'repeat' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.236    0.236 <__array_function__ internals>:177(tile)
        1    0.000    0.000    0.236    0.236 shape_base.py:1171(tile)
        4    0.221    0.055    0.221    0.055 {method 'fl

In [None]:
37642 function calls (37233 primitive calls) in 3.187 seconds
406159 function calls (406079 primitive calls) in 60.697 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   60.697   60.697 {built-in method builtins.exec}
        1    0.000    0.000   60.697   60.697 <string>:1(<module>)
        1    0.033    0.033   60.697   60.697 2292568830.py:1(run_fru)
        1   37.147   37.147   60.664   60.664 3636660809.py:1(make_sample_choices_dev)
       30    0.745    0.025   22.155    0.738 random.py:562(random_for_df)
       30    0.192    0.006   13.381    0.446 random.py:598(<listcomp>)
   404130   13.189    0.000   13.189    0.000 {method 'rand' of 'numpy.random.mtrand.RandomState' objects}
       32    8.025    0.251    8.025    0.251 {built-in method numpy.asanyarray}
    70/66    0.128    0.002    1.361    0.021 {built-in method numpy.core._multiarray_umath.implement_array_function}
       32    0.000    0.000    1.232    0.039 fromnumeric.py:51(_wrapfunc)
       30    0.000    0.000    1.104    0.037 <__array_function__ internals>:177(argmax)
       30    0.000    0.000    1.103    0.037 fromnumeric.py:1127(argmax)
       30    1.103    0.037    1.103    0.037 {method 'argmax' of 'numpy.ndarray' objects}
    
w/o random calls (test where we simply take max(obs_utlils) but everything else is identical)
     1039 function calls (1019 primitive calls) in 1.403 seconds
   Ordered by: cumulative time
   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    1.403    1.403 {built-in method builtins.exec}
        1    0.000    0.000    1.403    1.403 <string>:1(<module>)
        1    0.000    0.000    1.403    1.403 2292568830.py:1(run_fru)
        1    0.021    0.021    1.403    1.403 691721383.py:1(make_sample_choices_dev)
    40/36    0.120    0.003    1.380    0.038 {built-in method numpy.core._multiarray_umath.implement_array_function}
       32    0.000    0.000    1.260    0.039 fromnumeric.py:51(_wrapfunc)
       30    0.000    0.000    1.134    0.038 <__array_function__ internals>:177(argmax)
       30    0.000    0.000    1.133    0.038 fromnumeric.py:1127(argmax)
       30    1.133    0.038    1.133    0.038 {method 'argmax' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.127    0.127 <__array_function__ internals>:177(nonzero)
        1    0.000    0.000    0.127    0.127 fromnumeric.py:1866(nonzero)
        1    0.127    0.127    0.127    0.127 {method 'nonzero' of 'numpy.ndarray' objects}

In [38]:
%prun -s cumulative run_fru()

 

         406159 function calls (406079 primitive calls) in 61.192 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000   61.192   61.192 {built-in method builtins.exec}
        1    0.000    0.000   61.192   61.192 <string>:1(<module>)
        1    0.027    0.027   61.192   61.192 2292568830.py:1(run_fru)
        1   37.470   37.470   61.165   61.165 1111942506.py:1(make_sample_choices_dev)
       30    0.778    0.026   22.305    0.744 random.py:562(random_for_df)
       30    0.195    0.006   13.419    0.447 random.py:598(<listcomp>)
   404130   13.225    0.000   13.225    0.000 {method 'rand' of 'numpy.random.mtrand.RandomState' objects}
       32    8.104    0.253    8.104    0.253 {built-in method numpy.asanyarray}
    70/66    0.122    0.002    1.388    0.021 {built-in method numpy.core._multiarray_umath.implement_array_function}
       32    0.000    0.000    1.265    0.040 fromnumeric.py:51(_