Skip to content

Commit

Permalink
correct lognormal calculations and add functions to calculation logno…
Browse files Browse the repository at this point in the history
…rmal location and scale
  • Loading branch information
bstabler committed Feb 14, 2020
1 parent 7b0c332 commit 4cf3dab
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 55 deletions.
4 changes: 2 additions & 2 deletions activitysim/core/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ def read_from_table_info(table_info):
logger.info('writing %s to %s' % (h5_tablename, h5_filepath))
df.to_hdf(h5_filepath, key=h5_tablename, mode='a')

#bug
#df.to_csv(config.output_file_path('input_data/%s.csv' % tablename), index=True)
# bug
# df.to_csv(config.output_file_path('input_data/%s.csv' % tablename), index=True)

if drop_columns:
for c in drop_columns:
Expand Down
46 changes: 42 additions & 4 deletions activitysim/core/random.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,9 @@ def _generators_for_df(self, df):
"""

# assert no dupes
#bug
# bug
if len(df.index.unique()) < len(df.index):
#print(df)
# print(df)
bug

assert len(df.index.unique()) == len(df.index)
Expand Down Expand Up @@ -690,8 +690,8 @@ def lognormal_for_df(self, df, mu, sigma, broadcast=False):
"""

if broadcast:
# Note that the mean and standard deviation are not the values for the distribution itself,
# but of the underlying normal distribution it is derived from.
# Note that the mean and standard deviation are not the values for the distribution
# itself, but of the underlying normal distribution it is derived from.
rands = self.normal_for_df(df, mu=mu, sigma=sigma, broadcast=True)
rands = np.exp(rands)
else:
Expand Down Expand Up @@ -747,3 +747,41 @@ def choice_for_df(self, df, a, size, replace):
choices = channel.choice_for_df(df, self.step_name, a, size, replace)
t0 = print_elapsed_time("choice_for_df for %s rows" % len(df.index), t0, debug=True)
return choices

def calculate_location(self, mean, std_dev):
"""
Calculate the lognormal distribution location given the mean and standard
deviation of the distribution according to the formula
scale = sqrt(ln(1 + std_dev^2/mean^2))
Parameters
----------
mean : float
std_dev : float
Returns
-------
location : lognormal distribution location
"""
location = np.log(mean / (np.sqrt(1 + ((std_dev * std_dev) / (mean * mean)))))
return(location)

def calculate_scale(self, mean, std_dev):
"""
Calculate the lognormal distribution scale given the mean and standard
deviation of the distribution according to the formula
location = ln(mean/sqrt(1 + std_dev^2/mean^2))
Parameters
----------
mean : float
std_dev : float
Returns
-------
scale : lognormal distribution scale
"""
scale = np.sqrt(np.log(1 + ((std_dev * std_dev) / (mean * mean))))
return(scale)
26 changes: 14 additions & 12 deletions example/configs/tour_mode_choice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,11 @@ CONSTANTS:
Taxi_costPerMile: 2.30
Taxi_costPerMinute: 0.10
Taxi_waitTime_mean:
1: 26.5
2: 17.3
1: 5.5
2: 9.5
3: 13.3
4: 9.5
5: 5.5
4: 17.3
5: 26.5
Taxi_waitTime_sd:
1: 6.4
2: 6.4
Expand All @@ -96,11 +96,11 @@ CONSTANTS:
TNC_single_costPerMinute: 0.24
TNC_single_costMinimum: 7.20
TNC_single_waitTime_mean:
1: 10.3
2: 8.5
1: 4.7
2: 6.3
3: 8.4
4: 6.3
5: 4.7
4: 8.5
5: 10.3
TNC_single_waitTime_sd:
1: 4.1
2: 4.1
Expand All @@ -113,17 +113,19 @@ CONSTANTS:
TNC_shared_costMinimum: 3.00
TNC_shared_IVTFactor: 1.5
TNC_shared_waitTime_mean:
1: 15.0
2: 15.0
1: 7.0
2: 8.0
3: 11.0
4: 8.0
5: 7.0
4: 15.0
5: 15.0
TNC_shared_waitTime_sd:
1: 4.1
2: 4.1
3: 4.1
4: 4.1
5: 4.1
min_waitTime: 0
max_waitTime: 50

# so far, we can use the same spec as for non-joint tours
preprocessor:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,36 +31,27 @@ local,_DF_IS_TOUR,'tour_type' in df.columns
# MAAS,,
,origin_density_measure,"(reindex(land_use.TOTPOP, df[orig_col_name]) + reindex(land_use.TOTEMP, df[orig_col_name])) / (reindex(land_use.TOTACRE, df[orig_col_name]) / 640)"
,dest_density_measure,"(reindex(land_use.TOTPOP, df[dest_col_name]) + reindex(land_use.TOTEMP, df[dest_col_name])) / (reindex(land_use.TOTACRE, df[dest_col_name]) / 640)"
,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
,dest_density,"pd.cut(dest_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
,dest_density,"pd.cut(dest_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
,origin_zone_taxi_wait_time_mean,"origin_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
,origin_zone_taxi_wait_time_sd,"origin_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
,dest_zone_taxi_wait_time_mean,"dest_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
,dest_zone_taxi_wait_time_sd,"dest_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
#
# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from.
,origTaxiWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_taxi_wait_time_mean, sigma=origin_zone_taxi_wait_time_sd, broadcast=True)"
# ,, This is equivalent to:
#,origTaxiWaitTime,"np.exp(rng.normal_for_df(df, broadcast=True)*origin_zone_taxi_wait_time_sd + origin_zone_taxi_wait_time_mean)"
#
,destTaxiWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_taxi_wait_time_mean, sigma=dest_zone_taxi_wait_time_sd, broadcast=True)"
#
# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from
,origTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), rng.calculate_scale(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,destTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_taxi_wait_time_mean, dest_zone_taxi_wait_time_sd), rng.calculate_scale(dest_zone_taxi_wait_time_mean, dest_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,origin_zone_singleTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
,origin_zone_singleTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
,dest_zone_singleTNC_wait_time_mean,"dest_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
,dest_zone_singleTNC_wait_time_sd,"dest_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
#,origSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_singleTNC_wait_time_mean, sigma=origin_zone_singleTNC_wait_time_sd)"
,origSingleTNCWaitTime,5
#,destSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_singleTNC_wait_time_mean, sigma=dest_zone_singleTNC_wait_time_sd)"
,destSingleTNCWaitTime,5
,origSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), rng.calculate_scale(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,destSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_singleTNC_wait_time_mean, dest_zone_singleTNC_wait_time_sd), rng.calculate_scale(dest_zone_singleTNC_wait_time_mean, dest_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,origin_zone_sharedTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
,origin_zone_sharedTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
,dest_zone_sharedTNC_wait_time_mean,"dest_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
,dest_zone_sharedTNC_wait_time_sd,"dest_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
#,origSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_sharedTNC_wait_time_mean, sigma=origin_zone_sharedTNC_wait_time_sd)"
,origSharedTNCWaitTime,5
#,destSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_sharedTNC_wait_time_mean, sigma=dest_zone_sharedTNC_wait_time_sd)"
,destSharedTNCWaitTime,5
,origSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), rng.calculate_scale(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,destSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_sharedTNC_wait_time_mean, dest_zone_sharedTNC_wait_time_sd), rng.calculate_scale(dest_zone_sharedTNC_wait_time_mean, dest_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,totalWaitTaxi,origTaxiWaitTime + destTaxiWaitTime
,totalWaitSingleTNC,origSingleTNCWaitTime + destSingleTNCWaitTime
,totalWaitSharedTNC,origSharedTNCWaitTime + destSharedTNCWaitTime
Expand Down
26 changes: 14 additions & 12 deletions example/configs/trip_mode_choice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,11 +111,11 @@ CONSTANTS:
Taxi_costPerMile: 2.30
Taxi_costPerMinute: 0.10
Taxi_waitTime_mean:
1: 26.5
2: 17.3
1: 5.5
2: 9.5
3: 13.3
4: 9.5
5: 5.5
4: 17.3
5: 26.5
Taxi_waitTime_sd:
1: 6.4
2: 6.4
Expand All @@ -127,11 +127,11 @@ CONSTANTS:
TNC_single_costPerMinute: 0.24
TNC_single_costMinimum: 7.20
TNC_single_waitTime_mean:
1: 10.3
2: 8.5
1: 4.7
2: 6.3
3: 8.4
4: 6.3
5: 4.7
4: 8.5
5: 10.3
TNC_single_waitTime_sd:
1: 4.1
2: 4.1
Expand All @@ -144,17 +144,19 @@ CONSTANTS:
TNC_shared_costMinimum: 3.00
TNC_shared_IVTFactor: 1.5
TNC_shared_waitTime_mean:
1: 15.0
2: 15.0
1: 7.0
2: 8.0
3: 11.0
4: 8.0
5: 7.0
4: 15.0
5: 15.0
TNC_shared_waitTime_sd:
1: 4.1
2: 4.1
3: 4.1
4: 4.1
5: 4.1
min_waitTime: 0
max_waitTime: 50

# so far, we can use the same spec as for non-joint tours
preprocessor:
Expand Down
12 changes: 5 additions & 7 deletions example/configs/trip_mode_choice_annotate_trips_preprocessor.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,17 @@ dest terminal time not counted at home,_dest_terminal_time,"np.where(inbound & l
,destination_walk_time,shortWalk*60/walkSpeed
# MAAS,,
,origin_density_measure,"(reindex(land_use.TOTPOP, df[orig_col_name]) + reindex(land_use.TOTEMP, df[orig_col_name])) / (reindex(land_use.TOTACRE, df[orig_col_name]) / 640)"
,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
,origin_zone_taxi_wait_time_mean,"origin_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
,origin_zone_taxi_wait_time_sd,"origin_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
#,origTaxiWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_taxi_wait_time_mean, sigma=origin_zone_taxi_wait_time_sd)"
,origTaxiWaitTime,5
# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from
,origTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), rng.calculate_scale(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,origin_zone_singleTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
,origin_zone_singleTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
#,origSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_singleTNC_wait_time_mean, sigma=origin_zone_singleTNC_wait_time_sd)"
,origSingleTNCWaitTime,5
,origSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), rng.calculate_scale(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
,origin_zone_sharedTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
,origin_zone_sharedTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
#,origSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_sharedTNC_wait_time_mean, sigma=origin_zone_sharedTNC_wait_time_sd)"
,origSharedTNCWaitTime,5
,origSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), rng.calculate_scale(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
#,,
,sov_available,odt_skims['SOV_TIME']>0
,hov2_available,odt_skims['HOV2_TIME']>0
Expand Down

0 comments on commit 4cf3dab

Please sign in to comment.