Tncs (#14)

* add tncs * lognormal_for_df broadcasts duplicate indexes * lognormal_for_df broadcasts duplicate indexes code * correct lognormal calculations and add functions to calculation lognormal location and scale * clean-up tnc wait time expressions * set taxi, tnc wait time standard deviations to zero until we have better data * add scaled option to random.lognormal_for_df * add mobility-as-a-service modes to docs * updated tnc mode constants * rename maas to ridehail Co-authored-by: Jeff Doyle <toliwaga@gmail.com> Co-authored-by: Blake Rosenthal <blake.rosenthal@rsginc.com>
ActivitySim · Mar 25, 2020 · 4255f03 · 4255f03
1 parent f3955ec
commit 4255f03
Show file tree

Hide file tree

Showing 23 changed files with 1,065 additions and 716 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,4 @@
 sandbox/
-example/data/*
 .idea
 .ipynb_checkpoints
 .coverage*

diff --git a/.travis.yml b/.travis.yml
@@ -5,16 +5,15 @@ python:
   - '3.7'
   - '3.8'
 install:
-- wget http://repo.continuum.io/miniconda/Miniconda-3.7.0-Linux-x86_64.sh -O miniconda.sh
+- wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
 - bash miniconda.sh -b -p $HOME/miniconda
-- export PATH="$HOME/miniconda/bin:$PATH"
+- source "$HOME/miniconda/etc/profile.d/conda.sh"
 - hash -r
 - conda config --set always_yes yes --set changeps1 no
 - conda update -q conda
 - conda info -a
-- |
-  conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
-- source activate test-environment
+- conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION
+- conda activate test-environment
 - conda install pytest pytest-cov coveralls pycodestyle
 - pip install .
 - pip freeze
@@ -41,6 +40,8 @@ deploy:
 
 notifications:
   slack:
+    on_success: never # default: change
+    on_failure: always # default: always
     secure: Dpp+zBrnPGBHXrYWjwHy/bnHvhINfepSIiViwKfBZizBvTDvzSJfu6gCH+/lQ3squF3D4qTWwxB+LQ9V6KTYhuma8vQVisyneI6ARjUI/qgX6aJjuvmDDGPk6DVeDow7+aCLZ8VEHRhSjwy+dv0Ij0rxI6I94xPVwXUkk7ZjcK0=
 env:
   global:

diff --git a/activitysim/abm/models/atwork_subtour_destination.py b/activitysim/abm/models/atwork_subtour_destination.py
@@ -123,7 +123,7 @@ def atwork_subtour_destination_logsums(
         tour_purpose,
         logsum_settings, model_settings,
         skim_dict, skim_stack,
-        chunk_size, trace_hh_id,
+        chunk_size,
         trace_label)
 
     destination_sample['mode_choice_logsum'] = logsums

diff --git a/activitysim/abm/models/joint_tour_destination.py b/activitysim/abm/models/joint_tour_destination.py
@@ -224,7 +224,7 @@ def joint_tour_destination_logsums(
             tour_purpose,
             logsum_settings, model_settings,
             skim_dict, skim_stack,
-            chunk_size, trace_hh_id,
+            chunk_size,
             trace_label=tracing.extend_trace_label(trace_label, tour_type))
 
         logsums_list.append(logsums)

diff --git a/activitysim/abm/models/location_choice.py b/activitysim/abm/models/location_choice.py
@@ -224,7 +224,7 @@ def run_location_logsums(
         tour_purpose,
         logsum_settings, model_settings,
         skim_dict, skim_stack,
-        chunk_size, trace_hh_id,
+        chunk_size,
         trace_label)
 
     # "add_column series should have an index matching the table to which it is being added"

diff --git a/activitysim/abm/models/trip_destination.py b/activitysim/abm/models/trip_destination.py
@@ -151,7 +151,7 @@ def compute_logsums(
         tours_merged,
         model_settings,
         skims,
-        chunk_size, trace_hh_id,
+        chunk_size,
         trace_label):
     """
     Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
@@ -332,7 +332,7 @@ def choose_trip_destination(
         tours_merged=tours_merged,
         model_settings=model_settings,
         skims=skims,
-        chunk_size=chunk_size, trace_hh_id=trace_hh_id,
+        chunk_size=chunk_size,
         trace_label=trace_label)
 
     t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0)

diff --git a/activitysim/abm/models/util/logsums.py b/activitysim/abm/models/util/logsums.py
@@ -49,7 +49,7 @@ def compute_logsums(choosers,
                     tour_purpose,
                     logsum_settings, model_settings,
                     skim_dict, skim_stack,
-                    chunk_size, trace_hh_id, trace_label):
+                    chunk_size, trace_label):
     """
 
     Parameters

diff --git a/activitysim/abm/models/util/tour_destination.py b/activitysim/abm/models/util/tour_destination.py
@@ -149,7 +149,7 @@ def run_destination_logsums(
         tour_purpose,
         logsum_settings, model_settings,
         skim_dict, skim_stack,
-        chunk_size, trace_hh_id,
+        chunk_size,
         trace_label)
 
     destination_sample['mode_choice_logsum'] = logsums

diff --git a/activitysim/abm/tables/table_dict.py b/activitysim/abm/tables/table_dict.py
@@ -20,7 +20,6 @@
 @inject.injectable()
 def rng_channels():
 
-    # bug
     return RANDOM_CHANNELS
 
 

diff --git a/activitysim/abm/test/test_pipeline.py b/activitysim/abm/test/test_pipeline.py
@@ -335,7 +335,7 @@ def get_trace_csv(file_name):
     return df
 
 
-EXPECT_TOUR_COUNT = 205
+EXPECT_TOUR_COUNT = 201
 
 
 def regress_tour_modes(tours_df):
@@ -372,19 +372,19 @@ def regress_tour_modes(tours_df):
         'othdiscr',
         'work',
         'work',
-        'maint',
+        'business',
         'work',
-        'eatout',
-        ]
+        'othmaint'
+    ]
 
     EXPECT_MODES = [
         'SHARED3FREE',
         'WALK',
-        'DRIVEALONEFREE',
-        'WALK',
-        'WALK',
+        'SHARED3FREE',
         'WALK',
-        ]
+        'WALK_LOC',
+        'WALK'
+    ]
 
     assert len(tours_df) == len(EXPECT_PERSON_IDS)
     assert (tours_df.person_id.values == EXPECT_PERSON_IDS).all()

diff --git a/activitysim/core/random.py b/activitysim/core/random.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import pandas as pd
+from activitysim.core.util import reindex
 
 from .tracing import print_elapsed_time
 
@@ -247,10 +248,10 @@ def random_for_df(self, df, step_name, n=1):
         self.row_states.loc[df.index, 'offset'] += n
         return rands
 
-    def lognormal_for_df(self, df, step_name, mu, sigma):
+    def normal_for_df(self, df, step_name, mu, sigma, lognormal=False):
         """
-        Return a floating point random number in lognormal distribution for each row in df
-        using the appropriate random channel for each row.
+        Return a floating point random number in normal (or lognormal) distribution
+        for each row in df using the appropriate random channel for each row.
 
         Subsequent calls (in the same step) will return the next rand for each df row
 
@@ -293,9 +294,14 @@ def to_series(x):
         mu = to_series(mu)
         sigma = to_series(sigma)
 
-        rands = \
-            np.asanyarray([prng.lognormal(mean=mu[i], sigma=sigma[i])
-                           for i, prng in enumerate(generators)])
+        if lognormal:
+            rands = \
+                np.asanyarray([prng.lognormal(mean=mu[i], sigma=sigma[i])
+                               for i, prng in enumerate(generators)])
+        else:
+            rands = \
+                np.asanyarray([prng.normal(loc=mu[i], scale=sigma[i])
+                               for i, prng in enumerate(generators)])
 
         # update offset for rows we handled
         self.row_states.loc[df.index, 'offset'] += 1
@@ -596,9 +602,9 @@ def random_for_df(self, df, n=1):
         rands = channel.random_for_df(df, self.step_name, n)
         return rands
 
-    def lognormal_for_df(self, df, mu, sigma):
+    def normal_for_df(self, df, mu=0, sigma=1, broadcast=False):
         """
-        Return a single floating point random number in range [0, 1) for each row in df
+        Return a single floating point normal random number in range (-inf, inf) for each row in df
         using the appropriate random channel for each row.
 
         Subsequent calls (in the same step) will return the next rand for each df row
@@ -625,12 +631,80 @@ def lognormal_for_df(self, df, mu, sigma):
 
         Returns
         -------
-        rands : 1-D ndarray the same length as df
+        rands : 1-D ndarray the same length as df (or Series with same index as df)
             a single float in lognormal distribution for each row in df
         """
 
         channel = self.get_channel_for_df(df)
-        rands = channel.lognormal_for_df(df, self.step_name, mu, sigma)
+
+        if broadcast:
+            alts_df = df
+            df = df.index.unique().to_series()
+            rands = channel.normal_for_df(df, self.step_name, mu=0, sigma=1, lognormal=False)
+            rands = reindex(pd.Series(rands, index=df.index), alts_df.index)
+            rands = rands*sigma + mu
+        else:
+            rands = channel.normal_for_df(df, self.step_name, mu, sigma, lognormal=False)
+
+        return rands
+
+    def lognormal_for_df(self, df, mu, sigma, broadcast=False, scale=False):
+        """
+        Return a single floating point lognormal random number in range [0, inf) for each row in df
+        using the appropriate random channel for each row.
+
+        Note that by default (scale=False) the mean and standard deviation are not the values for
+        the distribution itself, but of the underlying normal distribution it is derived from.
+        This is perhaps counter-intuitive, but it is the way the numpy standard works,
+        and so we are conforming to it here.
+
+        If scale=True, then mu and sigma are the desired mean and standard deviation of the
+        lognormal distribution instead of the numpy standard where mu and sigma which are the
+        values for the distribution itself, rather than of the underlying normal distribution
+        it is derived from.
+
+        Subsequent calls (in the same step) will return the next rand for each df row
+
+        The resulting array will be the same length (and order) as df
+        This method is designed to support alternative selection from a probability array
+
+        The columns in df are ignored; the index name and values are used to determine
+        which random number sequence to to use.
+
+        We assume that we can identify the channel to used based on the name of df.index
+        This channel should have already been registered by a call to add_channel (q.v.)
+
+        If "true pseudo random" behavior is desired (i.e. NOT repeatable) the set_base_seed
+        method (q.v.) may be used to globally reseed all random streams.
+
+        Parameters
+        ----------
+        df : pandas.DataFrame, Series, or Index
+            df with index name and values corresponding to a registered channel
+
+        mu : float or array of floats with one value per df row
+        sigma : float or array of floats with one value per df row
+
+        Returns
+        -------
+        rands : 1-D ndarray the same length as df (or Series with same index as df)
+            a single float in lognormal distribution for each row in df
+        """
+
+        if scale:
+            # location = ln(mean/sqrt(1 + std_dev^2/mean^2))
+            # scale = sqrt(ln(1 + std_dev^2/mean^2))
+            x = 1 + ((sigma * sigma) / (mu * mu))
+            mu = np.log(mu / (np.sqrt(x)))
+            sigma = np.sqrt(np.log(x))
+
+        if broadcast:
+            rands = self.normal_for_df(df, mu=mu, sigma=sigma, broadcast=True)
+            rands = np.exp(rands)
+        else:
+            channel = self.get_channel_for_df(df)
+            rands = channel.normal_for_df(df, self.step_name, mu=mu, sigma=sigma, lognormal=True)
+
         return rands
 
     def choice_for_df(self, df, a, size, replace):

diff --git a/activitysim/core/simulate.py b/activitysim/core/simulate.py
@@ -44,7 +44,6 @@ def uniquify_spec_index(spec):
     for expr in spec.index:
         dict[assign.uniquify_key(dict, expr, template="{} # ({})")] = expr
 
-    # bug
     prev_index_name = spec.index.name
     spec.index = list(dict.keys())
     spec.index.name = prev_index_name

diff --git a/activitysim/examples/example_mtc/configs/settings.yaml b/activitysim/examples/example_mtc/configs/settings.yaml
@@ -22,6 +22,8 @@ skims_file: skims.omx
 # convert input CSVs to HDF5 format and save to outputs directory
 # create_input_store: True
 
+#input_store: ../output/input_data.h5
+
 # number of households to simulate
 households_sample_size:  100
 # simulate all households
@@ -45,7 +47,7 @@ use_shadow_pricing: False
 # trace household id; comment out or leave empty for no trace
 # households with all tour types
 #  [ 728370 1234067 1402924 1594625 1595333 1747572 1896849 1931818 2222690 2344951 2677154]
-trace_hh_id: 701664
+trace_hh_id: 2223759
 
 # trace origin, destination in accessibility calculation; comment out or leave empty for no trace
 # trace_od: [5, 11]