-
Notifications
You must be signed in to change notification settings - Fork 96
/
households.py
118 lines (81 loc) · 4.21 KB
/
households.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# ActivitySim
# See full license in LICENSE.txt.
from __future__ import (absolute_import, division, print_function, )
from future.standard_library import install_aliases
install_aliases() # noqa: E402
from builtins import range
import logging
import pandas as pd
import numpy as np
from activitysim.core import tracing
from activitysim.core import pipeline
from activitysim.core import inject
from .input_store import read_input_table
logger = logging.getLogger(__name__)
@inject.table()
def households(households_sample_size, override_hh_ids, trace_hh_id):
df_full = read_input_table("households")
households_sliced = False
logger.info("full household list contains %s households" % df_full.shape[0])
# only using households listed in override_hh_ids
if override_hh_ids is not None:
# trace_hh_id will not used if it is not in list of override_hh_ids
logger.info("override household list containing %s households" % len(override_hh_ids))
df = df_full[df_full.index.isin(override_hh_ids)]
households_sliced = True
if df.shape[0] < len(override_hh_ids):
logger.info("found %s of %s households in override household list" %
(df.shape[0], len(override_hh_ids)))
if df.shape[0] == 0:
raise RuntimeError('No override households found in store')
# if we are tracing hh exclusively
elif trace_hh_id and households_sample_size == 1:
# df contains only trace_hh (or empty if not in full store)
df = tracing.slice_ids(df_full, trace_hh_id)
households_sliced = True
# if we need a subset of full store
elif households_sample_size > 0 and df_full.shape[0] > households_sample_size:
logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0]))
"""
Because random seed is set differently for each step, sampling of households using
Random.global_rng would sample differently depending upon which step it was called from.
We use a one-off rng seeded with the pseudo step name 'sample_households' to provide
repeatable sampling no matter when the table is loaded.
Note that the external_rng is also seeded with base_seed so the sample will (rightly) change
if the pipeline rng's base_seed is changed
"""
prng = pipeline.get_rn_generator().get_external_rng('sample_households')
df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False))
households_sliced = True
# if tracing and we missed trace_hh in sample, but it is in full store
if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index:
# replace first hh in sample with trace_hh
logger.debug("replacing household %s with %s in household sample" %
(df.index[0], trace_hh_id))
df_hh = df_full.loc[[trace_hh_id]]
df = pd.concat([df_hh, df[1:]])
else:
df = df_full
# persons table
inject.add_injectable('households_sliced', households_sliced)
logger.info("loaded households %s" % (df.shape,))
df.index.name = 'household_id'
# FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id
assert 'chunk_id' not in df.columns
df['chunk_id'] = pd.Series(list(range(len(df))), df.index)
# replace table function with dataframe
inject.add_table('households', df)
pipeline.get_rn_generator().add_channel(df, 'households')
if trace_hh_id:
tracing.register_traceable_table('households', df)
tracing.trace_df(df, "raw.households", warn_if_empty=True)
return df
# this is a common merge so might as well define it once here and use it
@inject.table()
def households_merged(households, land_use, accessibility):
return inject.merge_tables(households.name, tables=[
households, land_use, accessibility])
inject.broadcast('households', 'persons', cast_index=True, onto_on='household_id')
# this would be accessibility around the household location - be careful with
# this one as accessibility at some other location can also matter
inject.broadcast('accessibility', 'households', cast_index=True, onto_on='TAZ')