Skip to content

Commit

Permalink
lognormal_for_df broadcasts duplicate indexes code
Browse files Browse the repository at this point in the history
  • Loading branch information
toliwaga committed Feb 13, 2020
1 parent 5163385 commit 7b0c332
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 18 deletions.
2 changes: 1 addition & 1 deletion activitysim/abm/models/atwork_subtour_destination.py
Expand Up @@ -131,7 +131,7 @@ def atwork_subtour_destination_logsums(
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id,
chunk_size,
trace_label)

destination_sample['mode_choice_logsum'] = logsums
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/joint_tour_destination.py
Expand Up @@ -234,7 +234,7 @@ def joint_tour_destination_logsums(
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id,
chunk_size,
trace_label=tracing.extend_trace_label(trace_label, tour_type))

logsums_list.append(logsums)
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/location_choice.py
Expand Up @@ -222,7 +222,7 @@ def run_location_logsums(
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id,
chunk_size,
trace_label)

# "add_column series should have an index matching the table to which it is being added"
Expand Down
4 changes: 2 additions & 2 deletions activitysim/abm/models/trip_destination.py
Expand Up @@ -155,7 +155,7 @@ def compute_logsums(
tours_merged,
model_settings,
skims,
chunk_size, trace_hh_id,
chunk_size,
trace_label):
"""
Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
Expand Down Expand Up @@ -325,7 +325,7 @@ def choose_trip_destination(
tours_merged=tours_merged,
model_settings=model_settings,
skims=skims,
chunk_size=chunk_size, trace_hh_id=trace_hh_id,
chunk_size=chunk_size,
trace_label=trace_label)

t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0)
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/util/logsums.py
Expand Up @@ -53,7 +53,7 @@ def compute_logsums(choosers,
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id, trace_label):
chunk_size, trace_label):
"""
Parameters
Expand Down
2 changes: 1 addition & 1 deletion activitysim/abm/models/util/tour_destination.py
Expand Up @@ -154,7 +154,7 @@ def run_destination_logsums(
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id,
chunk_size,
trace_label)

destination_sample['mode_choice_logsum'] = logsums
Expand Down
3 changes: 3 additions & 0 deletions activitysim/core/input.py
Expand Up @@ -92,6 +92,9 @@ def read_from_table_info(table_info):
logger.info('writing %s to %s' % (h5_tablename, h5_filepath))
df.to_hdf(h5_filepath, key=h5_tablename, mode='a')

#bug
#df.to_csv(config.output_file_path('input_data/%s.csv' % tablename), index=True)

if drop_columns:
for c in drop_columns:
logger.info("dropping column '%s'" % c)
Expand Down
84 changes: 74 additions & 10 deletions activitysim/core/random.py
Expand Up @@ -12,6 +12,7 @@

import numpy as np
import pandas as pd
from activitysim.core.util import reindex

from .tracing import print_elapsed_time

Expand Down Expand Up @@ -194,6 +195,11 @@ def _generators_for_df(self, df):
"""

# assert no dupes
#bug
if len(df.index.unique()) < len(df.index):
#print(df)
bug

assert len(df.index.unique()) == len(df.index)

df_row_states = self.row_states.loc[df.index]
Expand Down Expand Up @@ -250,10 +256,10 @@ def random_for_df(self, df, step_name, n=1):
self.row_states.loc[df.index, 'offset'] += n
return rands

def lognormal_for_df(self, df, step_name, mu, sigma):
def normal_for_df(self, df, step_name, mu, sigma, lognormal=False):
"""
Return a floating point random number in lognormal distribution for each row in df
using the appropriate random channel for each row.
Return a floating point random number in normal (or lognormal) distribution
for each row in df using the appropriate random channel for each row.
Subsequent calls (in the same step) will return the next rand for each df row
Expand Down Expand Up @@ -296,9 +302,14 @@ def to_series(x):
mu = to_series(mu)
sigma = to_series(sigma)

rands = \
np.asanyarray([prng.lognormal(mean=mu[i], sigma=sigma[i])
for i, prng in enumerate(generators)])
if lognormal:
rands = \
np.asanyarray([prng.lognormal(mean=mu[i], sigma=sigma[i])
for i, prng in enumerate(generators)])
else:
rands = \
np.asanyarray([prng.normal(loc=mu[i], scale=sigma[i])
for i, prng in enumerate(generators)])

# update offset for rows we handled
self.row_states.loc[df.index, 'offset'] += 1
Expand Down Expand Up @@ -599,9 +610,9 @@ def random_for_df(self, df, n=1):
rands = channel.random_for_df(df, self.step_name, n)
return rands

def lognormal_for_df(self, df, mu, sigma):
def normal_for_df(self, df, mu=0, sigma=1, broadcast=False):
"""
Return a single floating point random number in range [0, 1) for each row in df
Return a single floating point normal random number in range (-inf, inf) for each row in df
using the appropriate random channel for each row.
Subsequent calls (in the same step) will return the next rand for each df row
Expand All @@ -628,12 +639,65 @@ def lognormal_for_df(self, df, mu, sigma):
Returns
-------
rands : 1-D ndarray the same length as df
rands : 1-D ndarray the same length as df (or Series with same index as df)
a single float in lognormal distribution for each row in df
"""

channel = self.get_channel_for_df(df)
rands = channel.lognormal_for_df(df, self.step_name, mu, sigma)

if broadcast:
alts_df = df
df = df.index.unique().to_series()
rands = channel.normal_for_df(df, self.step_name, mu=0, sigma=1, lognormal=False)
rands = reindex(pd.Series(rands, index=df.index), alts_df.index)
rands = rands*sigma + mu
else:
rands = channel.normal_for_df(df, self.step_name, mu, sigma, lognormal=False)

return rands

def lognormal_for_df(self, df, mu, sigma, broadcast=False):
"""
Return a single floating point lognormal random number in range [0, inf) for each row in df
using the appropriate random channel for each row.
Subsequent calls (in the same step) will return the next rand for each df row
The resulting array will be the same length (and order) as df
This method is designed to support alternative selection from a probability array
The columns in df are ignored; the index name and values are used to determine
which random number sequence to to use.
We assume that we can identify the channel to used based on the name of df.index
This channel should have already been registered by a call to add_channel (q.v.)
If "true pseudo random" behavior is desired (i.e. NOT repeatable) the set_base_seed
method (q.v.) may be used to globally reseed all random streams.
Parameters
----------
df : pandas.DataFrame, Series, or Index
df with index name and values corresponding to a registered channel
mu : float or array of floats with one value per df row
sigma : float or array of floats with one value per df row
Returns
-------
rands : 1-D ndarray the same length as df (or Series with same index as df)
a single float in lognormal distribution for each row in df
"""

if broadcast:
# Note that the mean and standard deviation are not the values for the distribution itself,
# but of the underlying normal distribution it is derived from.
rands = self.normal_for_df(df, mu=mu, sigma=sigma, broadcast=True)
rands = np.exp(rands)
else:
channel = self.get_channel_for_df(df)
rands = channel.normal_for_df(df, self.step_name, mu=mu, sigma=sigma, lognormal=True)

return rands

def choice_for_df(self, df, a, size, replace):
Expand Down
2 changes: 1 addition & 1 deletion docs/howitworks.rst
Expand Up @@ -390,7 +390,7 @@ logsums settings and expression files. The resulting logsums are added to the c
tour_purpose,
logsum_settings, model_settings,
skim_dict, skim_stack,
chunk_size, trace_hh_id,
chunk_size,
trace_label)

location_sample_df['mode_choice_logsum'] = logsums
Expand Down

0 comments on commit 7b0c332

Please sign in to comment.