correct lognormal calculations and add functions to calculation logno…

…rmal location and scale
RSGInc · Feb 14, 2020 · 4cf3dab · 4cf3dab
1 parent 7b0c332
commit 4cf3dab
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 55 deletions.
diff --git a/activitysim/core/input.py b/activitysim/core/input.py
@@ -92,8 +92,8 @@ def read_from_table_info(table_info):
         logger.info('writing %s to %s' % (h5_tablename, h5_filepath))
         df.to_hdf(h5_filepath, key=h5_tablename, mode='a')
 
-        #bug
-        #df.to_csv(config.output_file_path('input_data/%s.csv' % tablename), index=True)
+        # bug
+        # df.to_csv(config.output_file_path('input_data/%s.csv' % tablename), index=True)
 
     if drop_columns:
         for c in drop_columns:

diff --git a/activitysim/core/random.py b/activitysim/core/random.py
@@ -195,9 +195,9 @@ def _generators_for_df(self, df):
         """
 
         # assert no dupes
-        #bug
+        # bug
         if len(df.index.unique()) < len(df.index):
-            #print(df)
+            # print(df)
             bug
 
         assert len(df.index.unique()) == len(df.index)
@@ -690,8 +690,8 @@ def lognormal_for_df(self, df, mu, sigma, broadcast=False):
         """
 
         if broadcast:
-            # Note that the mean and standard deviation are not the values for the distribution itself,
-            # but of the underlying normal distribution it is derived from.
+            # Note that the mean and standard deviation are not the values for the distribution
+            # itself, but of the underlying normal distribution it is derived from.
             rands = self.normal_for_df(df, mu=mu, sigma=sigma, broadcast=True)
             rands = np.exp(rands)
         else:
@@ -747,3 +747,41 @@ def choice_for_df(self, df, a, size, replace):
         choices = channel.choice_for_df(df, self.step_name, a, size, replace)
         t0 = print_elapsed_time("choice_for_df for %s rows" % len(df.index), t0, debug=True)
         return choices
+
+    def calculate_location(self, mean, std_dev):
+        """
+        Calculate the lognormal distribution location given the mean and standard
+        deviation of the distribution according to the formula
+
+        scale = sqrt(ln(1 + std_dev^2/mean^2))
+
+        Parameters
+        ----------
+        mean : float
+        std_dev : float
+
+        Returns
+        -------
+        location : lognormal distribution location
+        """
+        location = np.log(mean / (np.sqrt(1 + ((std_dev * std_dev) / (mean * mean)))))
+        return(location)
+
+    def calculate_scale(self, mean, std_dev):
+        """
+        Calculate the lognormal distribution scale given the mean and standard
+        deviation of the distribution according to the formula
+
+        location = ln(mean/sqrt(1 + std_dev^2/mean^2))
+
+        Parameters
+        ----------
+        mean : float
+        std_dev : float
+
+        Returns
+        -------
+        scale : lognormal distribution scale
+        """
+        scale = np.sqrt(np.log(1 + ((std_dev * std_dev) / (mean * mean))))
+        return(scale)
diff --git a/example/configs/tour_mode_choice.yaml b/example/configs/tour_mode_choice.yaml
@@ -80,11 +80,11 @@ CONSTANTS:
     Taxi_costPerMile: 2.30
     Taxi_costPerMinute: 0.10
     Taxi_waitTime_mean:
-      1: 26.5
-      2: 17.3
+      1: 5.5
+      2: 9.5
       3: 13.3
-      4: 9.5
-      5: 5.5
+      4: 17.3
+      5: 26.5
     Taxi_waitTime_sd: 
       1: 6.4
       2: 6.4
@@ -96,11 +96,11 @@ CONSTANTS:
     TNC_single_costPerMinute: 0.24
     TNC_single_costMinimum: 7.20
     TNC_single_waitTime_mean: 
-      1: 10.3
-      2: 8.5
+      1: 4.7
+      2: 6.3
       3: 8.4
-      4: 6.3
-      5: 4.7
+      4: 8.5
+      5: 10.3
     TNC_single_waitTime_sd: 
       1: 4.1
       2: 4.1
@@ -113,17 +113,19 @@ CONSTANTS:
     TNC_shared_costMinimum: 3.00
     TNC_shared_IVTFactor: 1.5
     TNC_shared_waitTime_mean: 
-      1: 15.0
-      2: 15.0
+      1: 7.0
+      2: 8.0
       3: 11.0
-      4: 8.0
-      5: 7.0
+      4: 15.0
+      5: 15.0
     TNC_shared_waitTime_sd: 
       1: 4.1
       2: 4.1
       3: 4.1
       4: 4.1
       5: 4.1
+    min_waitTime: 0
+    max_waitTime: 50
 
 # so far, we can use the same spec as for non-joint tours
 preprocessor:

diff --git a/example/configs/tour_mode_choice_annotate_choosers_preprocessor.csv b/example/configs/tour_mode_choice_annotate_choosers_preprocessor.csv
@@ -31,36 +31,27 @@ local,_DF_IS_TOUR,'tour_type' in df.columns
 # MAAS,,
 ,origin_density_measure,"(reindex(land_use.TOTPOP, df[orig_col_name]) + reindex(land_use.TOTEMP, df[orig_col_name])) / (reindex(land_use.TOTACRE, df[orig_col_name]) / 640)"
 ,dest_density_measure,"(reindex(land_use.TOTPOP, df[dest_col_name]) + reindex(land_use.TOTEMP, df[dest_col_name])) / (reindex(land_use.TOTACRE, df[dest_col_name]) / 640)"
-,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
-,dest_density,"pd.cut(dest_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
+,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
+,dest_density,"pd.cut(dest_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
 ,origin_zone_taxi_wait_time_mean,"origin_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
 ,origin_zone_taxi_wait_time_sd,"origin_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
 ,dest_zone_taxi_wait_time_mean,"dest_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
 ,dest_zone_taxi_wait_time_sd,"dest_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
-#
-# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from.
-,origTaxiWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_taxi_wait_time_mean, sigma=origin_zone_taxi_wait_time_sd, broadcast=True)"
-# ,, This is equivalent to:
-#,origTaxiWaitTime,"np.exp(rng.normal_for_df(df, broadcast=True)*origin_zone_taxi_wait_time_sd + origin_zone_taxi_wait_time_mean)"
-#
-,destTaxiWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_taxi_wait_time_mean, sigma=dest_zone_taxi_wait_time_sd, broadcast=True)"
-#
+# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from
+,origTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), rng.calculate_scale(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
+,destTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_taxi_wait_time_mean, dest_zone_taxi_wait_time_sd), rng.calculate_scale(dest_zone_taxi_wait_time_mean, dest_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 ,origin_zone_singleTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
 ,origin_zone_singleTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
 ,dest_zone_singleTNC_wait_time_mean,"dest_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
 ,dest_zone_singleTNC_wait_time_sd,"dest_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
-#,origSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_singleTNC_wait_time_mean, sigma=origin_zone_singleTNC_wait_time_sd)"
-,origSingleTNCWaitTime,5
-#,destSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_singleTNC_wait_time_mean, sigma=dest_zone_singleTNC_wait_time_sd)"
-,destSingleTNCWaitTime,5
+,origSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), rng.calculate_scale(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
+,destSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_singleTNC_wait_time_mean, dest_zone_singleTNC_wait_time_sd), rng.calculate_scale(dest_zone_singleTNC_wait_time_mean, dest_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 ,origin_zone_sharedTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
 ,origin_zone_sharedTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
 ,dest_zone_sharedTNC_wait_time_mean,"dest_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
 ,dest_zone_sharedTNC_wait_time_sd,"dest_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
-#,origSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_sharedTNC_wait_time_mean, sigma=origin_zone_sharedTNC_wait_time_sd)"
-,origSharedTNCWaitTime,5
-#,destSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=dest_zone_sharedTNC_wait_time_mean, sigma=dest_zone_sharedTNC_wait_time_sd)"
-,destSharedTNCWaitTime,5
+,origSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), rng.calculate_scale(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
+,destSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(dest_zone_sharedTNC_wait_time_mean, dest_zone_sharedTNC_wait_time_sd), rng.calculate_scale(dest_zone_sharedTNC_wait_time_mean, dest_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 ,totalWaitTaxi,origTaxiWaitTime + destTaxiWaitTime
 ,totalWaitSingleTNC,origSingleTNCWaitTime + destSingleTNCWaitTime
 ,totalWaitSharedTNC,origSharedTNCWaitTime + destSharedTNCWaitTime

diff --git a/example/configs/trip_mode_choice.yaml b/example/configs/trip_mode_choice.yaml
@@ -111,11 +111,11 @@ CONSTANTS:
   Taxi_costPerMile: 2.30
   Taxi_costPerMinute: 0.10
   Taxi_waitTime_mean:
-    1: 26.5
-    2: 17.3
+    1: 5.5
+    2: 9.5
     3: 13.3
-    4: 9.5
-    5: 5.5
+    4: 17.3
+    5: 26.5
   Taxi_waitTime_sd: 
     1: 6.4
     2: 6.4
@@ -127,11 +127,11 @@ CONSTANTS:
   TNC_single_costPerMinute: 0.24
   TNC_single_costMinimum: 7.20
   TNC_single_waitTime_mean: 
-    1: 10.3
-    2: 8.5
+    1: 4.7
+    2: 6.3
     3: 8.4
-    4: 6.3
-    5: 4.7
+    4: 8.5
+    5: 10.3
   TNC_single_waitTime_sd: 
     1: 4.1
     2: 4.1
@@ -144,17 +144,19 @@ CONSTANTS:
   TNC_shared_costMinimum: 3.00
   TNC_shared_IVTFactor: 1.5
   TNC_shared_waitTime_mean: 
-    1: 15.0
-    2: 15.0
+    1: 7.0
+    2: 8.0
     3: 11.0
-    4: 8.0
-    5: 7.0
+    4: 15.0
+    5: 15.0
   TNC_shared_waitTime_sd: 
     1: 4.1
     2: 4.1
     3: 4.1
     4: 4.1
     5: 4.1
+  min_waitTime: 0
+  max_waitTime: 50
 
 # so far, we can use the same spec as for non-joint tours
 preprocessor:

diff --git a/example/configs/trip_mode_choice_annotate_trips_preprocessor.csv b/example/configs/trip_mode_choice_annotate_trips_preprocessor.csv
@@ -46,19 +46,17 @@ dest terminal time not counted at home,_dest_terminal_time,"np.where(inbound & l
 ,destination_walk_time,shortWalk*60/walkSpeed
 # MAAS,,
 ,origin_density_measure,"(reindex(land_use.TOTPOP, df[orig_col_name]) + reindex(land_use.TOTEMP, df[orig_col_name])) / (reindex(land_use.TOTACRE, df[orig_col_name]) / 640)"
-,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[1, 2, 3, 4, 5]).astype(int)"
+,origin_density,"pd.cut(origin_density_measure, bins=[-np.inf, 500, 2000, 5000, 15000, np.inf], labels=[5, 4, 3, 2, 1]).astype(int)"
 ,origin_zone_taxi_wait_time_mean,"origin_density.map({k: v for k, v in Taxi_waitTime_mean.items()})"
 ,origin_zone_taxi_wait_time_sd,"origin_density.map({k: v for k, v in Taxi_waitTime_sd.items()})"
-#,origTaxiWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_taxi_wait_time_mean, sigma=origin_zone_taxi_wait_time_sd)"
-,origTaxiWaitTime,5
+# ,, Note that the mean and standard deviation are not the values for the distribution itself, but of the underlying normal distribution it is derived from
+,origTaxiWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), rng.calculate_scale(origin_zone_taxi_wait_time_mean, origin_zone_taxi_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 ,origin_zone_singleTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_single_waitTime_mean.items()})"
 ,origin_zone_singleTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_single_waitTime_sd.items()})"
-#,origSingleTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_singleTNC_wait_time_mean, sigma=origin_zone_singleTNC_wait_time_sd)"
-,origSingleTNCWaitTime,5
+,origSingleTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), rng.calculate_scale(origin_zone_singleTNC_wait_time_mean, origin_zone_singleTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 ,origin_zone_sharedTNC_wait_time_mean,"origin_density.map({k: v for k, v in TNC_shared_waitTime_mean.items()})"
 ,origin_zone_sharedTNC_wait_time_sd,"origin_density.map({k: v for k, v in TNC_shared_waitTime_sd.items()})"
-#,origSharedTNCWaitTime,"rng.lognormal_for_df(df, mu=origin_zone_sharedTNC_wait_time_mean, sigma=origin_zone_sharedTNC_wait_time_sd)"
-,origSharedTNCWaitTime,5
+,origSharedTNCWaitTime,"np.log(rng.lognormal_for_df(df, rng.calculate_location(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), rng.calculate_scale(origin_zone_sharedTNC_wait_time_mean, origin_zone_sharedTNC_wait_time_sd), broadcast=True)).clip(min_waitTime, max_waitTime)"
 #,,
 ,sov_available,odt_skims['SOV_TIME']>0
 ,hov2_available,odt_skims['HOV2_TIME']>0