In [1]:
%load_ext autoreload

In [2]:
%autoreload 2

In [3]:
import os
import argparse
from datetime import datetime

import numpy as np
import pandas as pd

from activitysim.cli import run
from activitysim.core import inject

In [4]:
pd.set_option("max_columns", 500)

In [5]:
utils = np.array([[1,2,3,4,5,6],[4,6,5,9,9,6],[7,8,9,1,2,3]]).reshape((3,3,2))
print(utils.shape)
print(np.argmax(utils, axis=1))
np.argmax(utils, axis=1).flatten(order="F")

(3, 3, 2)
[[2 2]
 [2 1]
 [1 0]]


array([2, 2, 1, 2, 1, 0])

In [124]:
ch_array = np.argmax(utils, axis=1)
ch_array

array([[2, 2],
       [2, 1],
       [1, 0]])

In [191]:
probs = np.array([[0.1,0.7,0.1,0.1], [0.0,0.5,0.25,0.25], [0.3,0.3,0.2,0.2]])
print(probs.shape)
probs

(3, 4)


array([[0.1 , 0.7 , 0.1 , 0.1 ],
       [0.  , 0.5 , 0.25, 0.25],
       [0.3 , 0.3 , 0.2 , 0.2 ]])

In [174]:
np.repeat(probs[:,:,None], 3, axis=2).shape

(3, 4, 3)

In [198]:
np.repeat(np.arange(0,probs.shape[0]), 2)

array([0, 0, 1, 1, 2, 2])

In [202]:
np.tile(np.arange(0,probs.shape[0]), 3)

array([0, 1, 2, 0, 1, 2, 0, 1, 2])

In [6]:
root_dir = "/mnt/c/Users/jan.zill/code/activitysim"
example_dir = os.path.join(root_dir, "test_example_mtc_frozen_rand")

In [7]:
os.chdir(example_dir)

In [8]:
parser = argparse.ArgumentParser()
run.add_run_args(parser)
args = parser.parse_args(['-c', 'configs', '-o', 'output_without_before', '-d', 'data'])
#run.run(args)  # 2mins full example run
if not inject.is_injectable('preload_injectables'):
    from activitysim import abm  # register abm steps and other abm-specific injectables
run.handle_standard_args(args)  # possibly update injectables

In [9]:
from activitysim.core import inject
from activitysim.core import pipeline
from activitysim.core import config
from activitysim.core import simulate
from activitysim.abm.models.util import estimation
from activitysim.abm.tables import shadow_pricing
from activitysim.core import interaction_simulate
from activitysim.core import logit
from activitysim.core.simulate import set_skim_wrapper_targets
from activitysim.core import chunk

In [10]:
from activitysim.core.logit import inverse_ev1_cdf

def hack_make_sample_choices(
    choosers, probs,
    alternatives,
    sample_size, alternative_count, alt_col_name,
    allow_zero_probs,
    trace_label,
    utilities=None,
    choose_individual_max_utility=False
):
    assert isinstance(probs, pd.DataFrame)
    assert probs.shape == (len(choosers), alternative_count)
    assert isinstance(alternatives, pd.DataFrame)
    assert len(alternatives) == alternative_count

    if allow_zero_probs:
        zero_probs = (probs.sum(axis=1) == 0)
        if zero_probs.all():
            return pd.DataFrame(columns=[alt_col_name, 'rand', 'prob', choosers.index.name])
        if zero_probs.any():
            # remove from sample
            probs = probs[~zero_probs]
            choosers = choosers[~zero_probs]
            # TODO [janzill Jun2022]: do we want this for consistency?
            #  might need this in other places too?
            if utilities is not None:
                utilities = utilities[~zero_probs]

    if choose_individual_max_utility:
        assert isinstance(utilities, pd.DataFrame)
        #print(utilities.head(3))
        assert utilities.shape == (len(choosers), alternative_count)
        #print(utilities.shape)

        choice_dimension = (len(choosers), alternative_count, sample_size)
        rands = pipeline.get_rn_generator().random_for_df(utilities, n=alternative_count*sample_size)
        #print(f"after generation rands shape = {rands.shape}", flush=True)
        rands = rands.reshape(choice_dimension)
        #print(f"before inverse_ev1 rands shape = {rands.shape}", flush=True)
        rands = inverse_ev1_cdf(rands)
        #print(f"after inverse_ev1 rands shape = {rands.shape}", flush=True)
        utilities = utilities.to_numpy()  # this should be much cleaner once xarray changes are implemented
        utilities = np.repeat(utilities[:,:,None], sample_size, axis=2)
        #print(f"after utils reshape: {utilities.shape}", flush=True)
        utilities += rands
        # this gives us len(choosers), sample_size dimensions, with values the chosen alternative
        choices_array = np.argmax(utilities, axis=1)
        print(choices_array.shape)

        choosers_index_rep = np.tile(np.arange(0,choices_array.shape[0]), sample_size)
        #np.repeat(np.arange(0,choices_array.shape[0]), sample_size)
        choices_flattened = choices_array.flatten(order='F')
        #print(f"choices flattened shape = {choices_flattened.shape}")

        print(choosers_index_rep.shape, flush=True)
        print(probs.shape, flush=True)
        print(probs.head(3), flush=True)
        probs_look_up = probs.to_numpy()[choosers_index_rep, choices_flattened]
        #print(f"probs_look_up shape = {probs_look_up.shape}", flush=True)

        # choices_flattened are 0-based index into alternatives, need to map to alternative values given by
        #  alternatives.index.values (they are in this order by construction)
        # explode to one row per chooser.index, alt_zone_id
        choices_df = pd.DataFrame({
            alt_col_name: alternatives.index.values[choices_flattened],
            #'rand': rands.flatten(order='F'),
            'rand': np.zeros_like(choosers_index_rep), # TODO: zero out for now
            'prob': probs_look_up.flatten(order='F'),
            # repeat is wrong here - we do not want 1,1,2,2,3,3, etc, but 1,2,3,1,2,3 by construction
            #choosers.index.name: np.repeat(np.asanyarray(choosers.index), sample_size)
            choosers.index.name: np.tile(choosers.index.values, sample_size)
        })

    else:
        cum_probs_array = probs.values.cumsum(axis=1)
        # alt probs in convenient layout to return prob of chose alternative
        # (same layout as cum_probs_arr)
        alt_probs_array = probs.values.flatten()
        # get sample_size rands for each chooser
        rands = pipeline.get_rn_generator().random_for_df(probs, n=sample_size)
        # transform as we iterate over alternatives
        # reshape so rands[i] is in broadcastable (2-D) shape for cum_probs_arr
        # i.e rands[i] is a 2-D array of one alt choice rand for each chooser
        rands = rands.T.reshape(sample_size, -1, 1)
        # the alternative value chosen
        choices_array = np.empty([sample_size, len(choosers)]).astype(alternatives.index.dtype)
        # chunk log these later after we populate them...
        # the probability of the chosen alternative
        choice_probs_array = np.empty([sample_size, len(choosers)])
        # chunk log these later after we populate them...
        alts = np.tile(alternatives.index.values, len(choosers))
        # FIXME - do this all at once rather than iterate?
        for i in range(sample_size):
            # FIXME - do this in numpy, not pandas?
            # rands for this alt in broadcastable shape
            r = rands[i]

            # position of first occurrence of positive value
            positions = np.argmax(cum_probs_array > r, axis=1)

            # FIXME - leave positions as numpy array, not pandas series?
            # positions is series with the chosen alternative represented as a column index in probs
            # which is an integer between zero and num alternatives in the alternative sample
            positions = pd.Series(positions, index=probs.index)

            # need to get from an integer offset into the alternative sample to the alternative index
            # that is, we want the index value of the row that is offset by <position> rows into the
            # tranche of this choosers alternatives created by cross join of alternatives and choosers

            # offsets is the offset into model_design df of first row of chooser alternatives
            offsets = np.arange(len(positions)) * alternative_count

            # choices and choice_probs have one element per chooser and is in same order as choosers
            choices_array[i] = np.take(alts, positions + offsets)
            choice_probs_array[i] = np.take(alt_probs_array, positions + offsets)
            del positions
            del offsets

        del alts
        del cum_probs_array
        del alt_probs_array

        # explode to one row per chooser.index, alt_zone_id
        choices_df = pd.DataFrame(
            {alt_col_name: choices_array.flatten(order='F'),
             'rand': rands.flatten(order='F'),
             'prob': choice_probs_array.flatten(order='F'),
             choosers.index.name: np.repeat(np.asanyarray(choosers.index), sample_size)
             })

    return choices_df

In [11]:
%%time

choose_individual_max_utility = True

resume_after = "compute_accessibility"
model_name = "school_location"
chunk_size = 0  # test_mtc means no chunking

pipeline.open_pipeline(resume_after)
# preload any bulky injectables (e.g. skims) not in pipeline
inject.get_injectable('preload_injectables', None)
pipeline._PIPELINE.rng().begin_step(model_name)
#step_name = model_name
args = {}
#checkpoint = pipeline.intermediate_checkpoint(model_name)
inject.set_step_args(args)


persons_merged = inject.get_table('persons_merged')
network_los = inject.get_injectable('network_los')
households = inject.get_table('households')
persons = inject.get_table('persons')
locutor = inject.get_injectable('locutor')

trace_label = model_name #'school_location'
model_settings_file_name = f"{model_name}.yaml" #'school_location.yaml'
model_settings = config.read_model_settings(model_settings_file_name)
estimator = estimation.manager.begin_estimation(model_name)
# iterate_location_choice()
chunk_tag = trace_label

# boolean to filter out persons not needing location modeling (e.g. is_worker, is_student)
chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME']
dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME']
logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME')
sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME')
want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None
persons_merged_df = persons_merged.to_frame()
persons_merged_df = persons_merged_df[persons_merged_df[chooser_filter_column]]
persons_merged_df.sort_index(inplace=True)  # interaction_sample expects chooser index to be monotonic increasing

# chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types
chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']
assert chooser_segment_column in persons_merged_df, f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in " \
                                                    f"persons_merged table."
shadow_price_calculator = shadow_pricing.load_shadow_price_calculator(model_settings)
chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']
# maps segment names to compact (integer) ids
segment_ids = model_settings['SEGMENT_IDS']

sample_list = []
for segment_name, segment_id in segment_ids.items():
    print(f"running {segment_name}, {segment_id}")
    choosers = persons_merged_df[persons_merged_df[chooser_segment_column] == segment_id]
    # size_term and shadow price adjustment - one row per zone
    dest_size_terms = shadow_price_calculator.dest_size_terms(segment_name)
    assert dest_size_terms.index.is_monotonic_increasing, f"shadow_price_calculator.dest_size_terms({segment_name}) " \
                                                         f"not monotonic_increasing"
    if choosers.shape[0] == 0:
        print(f"{trace_label} skipping segment {segment_name}: no choosers")
        continue
    print(f"dropping {(~(dest_size_terms.size_term > 0)).sum()} "
          f"of {len(dest_size_terms)} rows where size_term is zero")
    dest_size_terms = dest_size_terms[dest_size_terms.size_term > 0]
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers_location_sample = choosers[chooser_columns]
    skim_dict = network_los.get_default_skim_dict()
    skims = skim_dict.wrap('home_zone_id', 'zone_id')
    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    assert not choosers_location_sample.empty
    print("Running %s with %d persons" % (trace_label, len(choosers_location_sample.index)))
    sample_size = model_settings["SAMPLE_SIZE"]
    locals_d = {
        'skims': skims,
        'segment_size': segment_name
    }
    constants = config.get_model_constants(model_settings)
    locals_d.update(constants)
    spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC',
                                     segment_name=segment_name, estimator=estimator)
    ### choices = interaction_sample()
    alt_col_name=alt_dest_col_name
    allow_zero_probs=False
    log_alt_losers=False
    # we return alternatives ordered in (index, alt_col_name)
    # if choosers index is not ordered, it is probably a mistake, since the alts wont line up
    assert alt_col_name is not None
    assert choosers.index.is_monotonic_increasing

    # FIXME - legacy logic - not sure this is needed or even correct?
    sample_size = min(sample_size, len(dest_size_terms.index))

    result_list = []
    for i, chooser_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers(choosers_location_sample, chunk_size, trace_label,
                                                                               chunk_tag):

        ### choices = hack_interaction_sample
        # chooser = chooser_chunk
        #alternatives = dest_size_terms
        #trace_label=chunk_trace_label

        num_choosers = len(chooser_chunk.index)
        assert num_choosers > 0
        if len(spec.columns) > 1:
            raise RuntimeError('spec must have only one column')
        # if using skims, copy index into the dataframe, so it will be
        # available as the "destination" for set_skim_wrapper_targets
        if skims is not None and dest_size_terms.index.name not in dest_size_terms:
            dest_size_terms = dest_size_terms.copy()
            dest_size_terms[dest_size_terms.index.name] = dest_size_terms.index

        chooser_index_id = interaction_simulate.ALT_CHOOSER_ID if log_alt_losers else None

        # - cross join choosers and alternatives (cartesian product)
        # for every chooser, there will be a row for each alternative
        # index values (non-unique) are from alternatives df
        alternative_count = dest_size_terms.shape[0]
        interaction_df =\
            logit.interaction_dataset(chooser_chunk, dest_size_terms, sample_size=alternative_count,
                                      chooser_index_id=chooser_index_id)

        assert alternative_count == len(interaction_df.index) / len(chooser_chunk.index)

        if skims is not None:
            set_skim_wrapper_targets(interaction_df, skims)

        # evaluate expressions from the spec multiply by coefficients and sum
        # spec is df with one row per spec expression and one col with utility coefficient
        # column names of interaction_df match spec index values
        # utilities has utility value for element in the cross product of choosers and alternatives
        # interaction_utilities is a df with one utility column and one row per row in interaction_df
        trace_rows = trace_ids = None

        # interaction_utilities is a df with one utility column and one row per interaction_df row
        interaction_utilities, trace_eval_results = interaction_simulate.eval_interaction_utilities(
            spec, interaction_df, locals_d, chunk_trace_label, trace_rows, estimator=None,
            log_alt_losers=log_alt_losers
        )
        # ########### HWM - high water mark (point of max observed memory usage)
        #del interaction_df

        # reshape utilities (one utility column and one row per row in interaction_utilities)
        # to a dataframe with one row per chooser and one column per alternative
        utilities = pd.DataFrame(
            interaction_utilities.values.reshape(len(chooser_chunk), alternative_count),
            index=chooser_chunk.index)
        #del interaction_utilities

        # convert to probabilities (utilities exponentiated and normalized to probs)
        # probs is same shape as utilities, one row per chooser and one column for alternative
        probs = logit.utils_to_probs(utilities, allow_zero_probs=allow_zero_probs,
                                     trace_label=chunk_trace_label, trace_choosers=chooser_chunk)
        #del utilities

        temp_choices = hack_make_sample_choices(
            chooser_chunk, probs, dest_size_terms,
            sample_size, alternative_count, alt_col_name,
            allow_zero_probs=allow_zero_probs,
            trace_label=chunk_trace_label,
            utilities=utilities,
            choose_individual_max_utility=choose_individual_max_utility
        )

        choices_df = temp_choices.copy()

        # pick_count and pick_dup
        # pick_count is number of duplicate picks
        # pick_dup flag is True for all but first of duplicates
        pick_group = choices_df.groupby([choosers.index.name, alt_col_name])
        # number each item in each group from 0 to the length of that group - 1.
        choices_df['pick_count'] = pick_group.cumcount(ascending=True)
        # flag duplicate rows after first
        choices_df['pick_dup'] = choices_df['pick_count'] > 0
        # add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge)
        choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1
        # drop the duplicates
        choices_df = choices_df[~choices_df['pick_dup']]
        del choices_df['pick_dup']
        # set index after groupby so we can trace on it
        choices_df.set_index(choosers.index.name, inplace=True)
        # don't need this after tracing
        del choices_df['rand']
        # - NARROW
        choices_df['prob'] = choices_df['prob'].astype(np.float32)
        assert (choices_df['pick_count'].max() < 4294967295) or (choices_df.empty)
        choices_df['pick_count'] = choices_df['pick_count'].astype(np.uint32)


        if choices_df.shape[0] > 0:
            result_list.append(choices_df)

    if len(result_list) > 1:
        choices_df = pd.concat(result_list)
    assert allow_zero_probs or (len(choosers_location_sample.index) == len(np.unique(choices_df.index.values))), \
        "what is this"
    # keep alts in canonical order so choices based on their probs are stable across runs
    choices_df = choices_df.sort_values(by=alt_col_name).sort_index(kind='mergesort')

    sample_list.append(choices_df)

finalise = True
if finalise:
    inject.set_step_args(None)
    #
    pipeline._PIPELINE.rng().end_step(model_name)
    pipeline.add_checkpoint(model_name)
    if not pipeline.intermediate_checkpoint():
        pipeline.add_checkpoint(pipeline.FINAL_CHECKPOINT_NAME)

    pipeline.close_pipeline()

estimation bundle school_location not in settings file estimation.yaml


running university, 3
dropping 19 of 25 rows where size_term is zero
Running school_location with 17 persons
(17, 6)
(102,)
(17, 6)
                  0         1         2         3         4         5
person_id                                                            
325623     0.001874  0.005184  0.002695  0.299427  0.508256  0.182563
386007     0.001584  0.724879  0.145533  0.080880  0.041571  0.005553
1774265    0.019349  0.146800  0.038209  0.486232  0.273819  0.035591
running highschool, 2
dropping 23 of 25 rows where size_term is zero
Running school_location with 5 persons
(5, 2)
(10,)
(5, 2)
                  0         1
person_id                    
386062     0.176063  0.823937
595685     0.057237  0.942763
2877285    0.126995  0.873005
running gradeschool, 1
dropping 0 of 25 rows where size_term is zero
Running school_location with 17 persons
(17, 25)
(425,)
(17, 25)
                 0         1         2         3         4         5   \
person_id                        

In [12]:
temp_choices #.sort_values(by=["person_id"])

Unnamed: 0,alt_dest,rand,prob,person_id
0,10,0,0.205884,386008
1,10,0,0.288773,418442
2,21,0,0.147571,595686
3,3,0,0.005248,644292
4,8,0,0.117232,644478
...,...,...,...,...
420,25,0,0.265744,2566702
421,10,0,0.288773,2877287
422,9,0,0.336708,3596365
423,6,0,0.032124,3891104


In [28]:
test_df = temp_choices.copy()
# pick_count and pick_dup
# pick_count is number of duplicate picks
# pick_dup flag is True for all but first of duplicates
pick_group = test_df.groupby([choosers.index.name, alt_col_name])
# number each item in each group from 0 to the length of that group - 1.
test_df['pick_count'] = pick_group.cumcount(ascending=True)
# flag duplicate rows after first
test_df['pick_dup'] = test_df['pick_count'] > 0
# add reverse cumcount to get total pick_count (conveniently faster than groupby.count + merge)
test_df['pick_count'] += pick_group.cumcount(ascending=False) + 1
# drop the duplicates
test_df = test_df[~test_df['pick_dup']]
del test_df['pick_dup']
# set index after groupby so we can trace on it
test_df.set_index(choosers.index.name, inplace=True)
del test_df['rand']

In [29]:
test_df

Unnamed: 0_level_0,alt_dest,prob,pick_count
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
386008,10,0.205884,4
418442,10,0.288773,9
595686,21,0.147571,4
644292,3,0.005248,1
644478,8,0.117232,4
...,...,...,...
644478,6,0.032124,1
2458502,25,0.032621,1
2458503,11,0.078812,1
2566700,16,0.036706,1


In [39]:
test_df = temp_choices.copy()
del test_df['rand']
test_df.value_counts().to_frame("pick_count").reset_index(["prob", "alt_dest"])

Unnamed: 0_level_0,alt_dest,prob,pick_count
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3596365,9,0.336708,13
386008,9,0.336708,12
2877287,10,0.288773,10
1958678,9,0.336708,10
2458502,8,0.301414,9
...,...,...,...
2458503,11,0.078812,1
644292,3,0.005248,1
2458502,10,0.103895,1
3891104,10,0.097048,1


In [31]:
test_df

Unnamed: 0,alt_dest,rand,prob,person_id
0,10,0,0.205884,386008
1,10,0,0.288773,418442
2,21,0,0.147571,595686
3,3,0,0.005248,644292
4,8,0,0.117232,644478
...,...,...,...,...
420,25,0,0.265744,2566702
421,10,0,0.288773,2877287
422,9,0,0.336708,3596365
423,6,0,0.032124,3891104


In [40]:
utilities

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
person_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
386008,0.158242,1.108732,1.816462,1.271522,2.801208,3.980078,4.872271,5.9063,6.663034,6.171129,5.090286,1.045189,-0.737088,0.241825,-0.12349,3.57109,3.078758,3.017454,2.996729,4.33388,4.196068,1.575772,0.641332,1.847401,4.099442
418442,0.112642,1.063132,1.770862,1.220222,2.744208,3.70325,4.445377,5.495825,6.137626,6.368157,5.320152,1.124989,-0.657288,0.321625,-0.04369,3.65089,3.152858,3.097254,3.244376,4.66226,4.372339,1.530172,0.595732,1.796101,4.042442
595686,0.363442,1.313932,2.021662,1.599651,3.201645,3.72605,4.56031,5.610758,5.254657,5.202408,5.385828,1.836706,-0.149232,0.604582,0.20711,4.469041,3.740891,3.991371,3.145862,4.0055,5.488831,1.780972,0.846532,2.046901,4.184942
644292,0.426142,1.419508,2.267947,1.796678,3.710634,4.948799,5.808154,5.939138,5.530123,5.16957,5.057449,1.508326,-0.379098,0.498325,0.13301,3.81049,3.312458,3.137154,2.751629,3.584558,4.421596,1.843672,0.909232,2.115301,4.430042
644478,1.242073,1.912078,2.267947,1.928031,2.938941,3.61775,4.287139,4.912307,4.861357,4.723356,4.408683,1.672516,0.343338,1.655398,1.174217,5.355667,4.364813,3.245454,2.688929,3.430658,4.290244,2.065972,1.234132,2.393491,4.276142
1958678,0.158242,1.108732,1.816462,1.271522,2.801208,3.980078,4.872271,5.9063,6.663034,6.171129,5.090286,1.045189,-0.737088,0.241825,-0.12349,3.57109,3.078758,3.017454,2.996729,4.33388,4.196068,1.575772,0.641332,1.847401,4.099442
2159059,0.044242,0.994732,1.702462,1.151822,2.664408,3.61775,4.338439,5.24954,5.661475,5.941263,5.073867,1.056589,-0.719988,0.258925,-0.10639,3.84469,3.346658,3.301773,3.572756,4.974221,4.914166,1.461772,0.527332,1.733401,3.962642
2219998,1.242073,1.912078,2.267947,1.928031,2.938941,3.61775,4.287139,4.912307,4.861357,4.723356,4.408683,1.672516,0.343338,1.655398,1.174217,5.355667,4.364813,3.245454,2.688929,3.430658,4.290244,2.065972,1.234132,2.393491,4.276142
2458502,0.409042,1.370251,2.153014,1.731002,3.480768,4.718933,5.643964,6.579479,6.07195,5.514369,5.238057,1.590421,-0.395517,0.486925,0.11591,3.79909,3.306758,3.165654,2.814329,3.698558,4.569367,1.826572,0.892132,2.092501,4.355942
2458503,0.409042,1.370251,2.153014,1.731002,3.480768,4.718933,5.643964,6.579479,6.07195,5.514369,5.238057,1.590421,-0.395517,0.486925,0.11591,3.79909,3.306758,3.165654,2.814329,3.698558,4.569367,1.826572,0.892132,2.092501,4.355942


In [None]:
display(sample_size)
sample_list[2].groupby('person_id').pick_count.sum()

In [46]:
utils = np.array([[1,2,3,4,5,6],[4,6,5,9,9,6]])
print(utils.shape)
print(utils)
print(np.argmax(utils, axis=1))

(2, 6)
[[1 2 3 4 5 6]
 [4 6 5 9 9 6]]
[5 3]


In [49]:
utils[np.argmax(utils, axis=1, keepdim=True)]

TypeError: _argmax_dispatcher() got an unexpected keyword argument 'keepdim'

In [71]:
utils[np.arange(2), np.argmax(utils, axis=1)] += 1

In [72]:
utils

array([[ 1,  2,  3,  4,  5,  7],
       [ 4,  6,  5, 10,  9,  6]])

In [74]:
dest_size_terms.index.values

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25])