From b339016d6f63cedb8e76bb53ab7b42392472c34d Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 1 Mar 2024 14:40:29 +1000 Subject: [PATCH 01/52] Scratch working for link loading --- aequilibrae/paths/route_choice.pxd | 9 + aequilibrae/paths/route_choice.pyx | 242 ++++++++++++++++--- tests/aequilibrae/paths/test_route_choice.py | 9 +- 3 files changed, 221 insertions(+), 39 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index bf4671d47..e173e0cc7 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -140,6 +140,13 @@ cdef class RouteChoiceSet: bint block_flows_through_centroids bint a_star + vector[pair[long long, long long]] *ods + vector[RouteSet_t *] *results + vector[vector[long long] *] *link_union_set + vector[vector[double] *] *cost_set + vector[vector[double] *] *gamma_set + vector[vector[double] *] *prob_set + cdef void path_find( RouteChoiceSet self, long origin_index, @@ -202,6 +209,8 @@ cdef class RouteChoiceSet: double theta ) noexcept nogil + # cdef void link_loading(self, double[:, :] matrix_view) nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results( vector[pair[long long, long long]] &ods, diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index b22dd0c08..211d292af 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -114,7 +114,12 @@ cdef class RouteChoiceSet: def __cinit__(self): """C level init. For C memory allocation and initialisation. Called exactly once per object.""" - pass + results = nullptr + link_union_set = nullptr + cost_set = nullptr + gamma_set = nullptr + prob_set = nullptr + ods = nullptr def __init__(self, graph: Graph): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" @@ -134,13 +139,51 @@ cdef class RouteChoiceSet: self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows - def __dealloc__(self): """ C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a partially deallocated state already. """ - pass + self.deallocate_results() + + def deallocate_results(self): + """ + Deallocate stored results, existing extracted results are not invalidated. + """ + cdef: + RouteSet_t *route_set + vector[long long] *link_vec + vector[double] *double_vec + + if self.results != nullptr: + for route_set in deref(self.results): + for link_vec in deref(route_set): + del link_vec + del route_set + del self.results + + if self.link_union_set != nullptr: + for link_vec in deref(self.link_union_set): + del link_vec + del self.link_union_vec + + if self.cost_set != nullptr: + for double_vec in deref(self.cost_set): + del double_vec + del self.cost_vec + + if self.gamma_set != nullptr: + for double_vec in deref(self.gamma_set): + del double_vec + del self.gamma_vec + + if self.prob_set != nullptr: + for double_vec in deref(self.prob_set): + del double_vec + del self.prob_vec + + if self.ods != nullptr: + del self.ods @cython.embedsignature(True) def run(self, origin: int, destination: int, *args, **kwargs): @@ -160,7 +203,8 @@ cdef class RouteChoiceSet: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. Represents paths from ``origin`` to ``destination``. """ - return [tuple(x) for x in self.batched([(origin, destination)], *args, **kwargs).column("route set").to_pylist()] + self.batched([(origin, destination)], *args, **kwargs) + return [tuple(x) for x in self.get_results().column("route set").to_pylist()] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @@ -199,10 +243,6 @@ cdef class RouteChoiceSet: **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. - - :Returns: - **route sets** (:obj:`dict[tuple[int, int], list[tuple[int, ...]]]`): Returns a list of unique tuples of compact link IDs for - each OD pair provided (as keys). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. """ cdef: long long o, d @@ -274,26 +314,35 @@ cdef class RouteChoiceSet: cdef: RouteSet_t *route_set pair[vector[long long] *, vector[long long] *] freq_pair - vector[long long] *link_union = nullptr + vector[long long] *link_union_scratch = nullptr + vector[vector[long long] *] *link_union_set = nullptr vector[vector[double] *] *cost_set = nullptr vector[vector[double] *] *gamma_set = nullptr - vector[vector[double] *] *prob_set= nullptr + vector[vector[double] *] *prob_set = nullptr if path_size_logit: + link_union_set = new vector[vector[long long] *](max_results_len) cost_set = new vector[vector[double] *](max_results_len) gamma_set = new vector[vector[double] *](max_results_len) prob_set = new vector[vector[double] *](max_results_len) + self.deallocate_results() # We have be storing results from a previous run + for batch in batches: c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer batch_len = c_ods.size() results.resize(batch_len) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed if path_size_logit: + # we may clear these objects because it's either: + # - the first iteration and they contain no elements, thus no memory to leak + # - the internal objects were freed by the previous iteration + link_union_set.clear() cost_set.clear() gamma_set.clear() prob_set.clear() + link_union_set.resize(batch_len) cost_set.resize(batch_len) gamma_set.resize(batch_len) prob_set.resize(batch_len) @@ -302,7 +351,7 @@ cdef class RouteChoiceSet: # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch if path_size_logit: - link_union = new vector[long long]() + link_union_scratch = new vector[long long]() for i in prange(batch_len): origin_index = self.nodes_to_indices_view[c_ods[i].first] @@ -349,13 +398,13 @@ cdef class RouteChoiceSet: ) if path_size_logit: - link_union.clear() - freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union)) + link_union_scratch.clear() + freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch)) + deref(link_union_set)[i] = freq_pair.first deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view) deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta) - del freq_pair.first - del freq_pair.second + del freq_pair.second # While we need the unique sorted links (.first), we don't need the frequencies (.second) deref(results)[i] = route_set @@ -370,38 +419,50 @@ cdef class RouteChoiceSet: ) if path_size_logit: - del link_union + del link_union_scratch + + if where is not None: + table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) - table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) + # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures + if path_size_logit: + for j in range(batch_len): + del deref(link_union_set)[j] + del deref(cost_set)[j] + del deref(gamma_set)[j] + del deref(prob_set)[j] - # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures - if path_size_logit: for j in range(batch_len): - del deref(cost_set)[j] - del deref(gamma_set)[j] - del deref(prob_set)[j] + for route in deref(deref(results)[j]): + del route + del deref(results)[j] - for j in range(batch_len): - for route in deref(deref(results)[j]): - del route - - if where is not None: checkpoint.write(table) del table else: - break # There was only one batch anyway + pass # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in memory - # We're done with everything now, we can free the outer internal structures - if path_size_logit: - del cost_set - del gamma_set - del prob_set + # Here we decide if we wish to preserve our results for later saving/link loading + if where is not None: + # We're done with everything now, we can free the outer internal structures del results - - if where is None: - return table + if path_size_logit: + del link_union_set + del cost_set + del gamma_set + del prob_set else: - return + self.results = results + self.link_union_set = link_union_set + self.cost_set = cost_set + self.gamma_set = gamma_set + self.prob_set = prob_set + + # Copy the c_ods vector, it was provided by the auto Cython conversion and is allocated on the stack, + # we should copy it to keep it around + self.ods = new vector[pair[long long, long long]](c_ods) + + # self.link_union ?? This could be saved as a partial results from the computation above, although it isn't easy to get out rn @cython.initializedcheck(False) cdef void path_find( @@ -744,6 +805,92 @@ cdef class RouteChoiceSet: return prob_vec + def link_loading(self, double[:, :] matrix_view): + if self.ods == nullptr \ + or self.link_union_set == nullptr \ + or self.prob_set == nullptr: + raise ValueError("link loading requires Route Choice path_size_logit results") + + cdef: + vector[double] *loads + vector[double] *route_set_prob + + vector[long long] *link_union + vector[long long].const_iterator link_union_iter + + vector[long long] *links + vector[long long].const_iterator link_iter + + vector[double].const_iterator prob_iter + + RouteSet_t *route_set + double demand, load, prob + size_t length + long origin_index, dest_index + int i + + fprintf(stderr, "starting link loading\n") + with nogil, parallel(num_threads=1): + # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. + # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch + fprintf(stderr, "core: %d\n", threadid()) + + for i in prange(self.ods.size()): + origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] + dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] + demand = matrix_view[origin_index, dest_index] + fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) + + route_set = deref(self.results)[i] + fprintf(stderr, "got route set\n") + link_union = deref(self.link_union_set)[i] + fprintf(stderr, "got link union\n") + route_set_prob = deref(self.prob_set)[i] + fprintf(stderr, "got route set probsk\n") + + fprintf(stderr, "making new loads vector\n") + loads = new vector[double](link_union.size(), 0.0) + + fprintf(stderr, "starting route iteration\n") + # We now iterate over all routes in the route_set, each route has an associated probability + route_prob_iter = route_set_prob.cbegin() + for route in deref(route_set): + load = demand * deref(route_prob_iter) + inc(route_prob_iter) + + if load == 0.0: + continue + + # For each link in the route, we need to assign the appropriate demand * prob + # Because the link union is known to be sorted, if the links in the route are also sorted we can just step + # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us + # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. + # This is also incredibly cache efficient, the only downsides are that the code is harder to read + # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already + # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted + # then used an N-way merge we could reuse the sorted routes and the sorted link union. + links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved + sort(links.begin(), links.end()) + + # links and link_union are sorted, and links is a subset of link_union + link_union_iter = link_union.cbegin() + link_iter = links.cbegin() + + # fprintf(stderr, "starting link iteration\n") + while link_iter != links.cend(): + # Find the next location for the current link in links + while deref(link_iter) != deref(link_union_iter): + inc(link_union_iter) + + fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter)) + deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + load + + inc(link_iter) + + with gil: + print(origin_index, dest_index, deref(loads)) + + @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) @@ -837,6 +984,27 @@ cdef class RouteChoiceSet: return table + def get_results(self): # Cython doesn't like this type annotation... -> pa.Table: + """ + :Returns: + **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of compact link IDs for + each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. + """ + if self.results == nullptr or self.ods == nullptr: + raise ValueError("Route Choice results not computed yet") + + table = libpa.pyarrow_wrap_table( + RouteChoiceSet.make_table_from_results( + deref(self.ods), + deref(self.results), + self.cost_set, + self.gamma_set, + self.prob_set + ) + ) + + return table + @cython.embedsignature(True) cdef class Checkpoint: diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index b68f64d97..c02c4ad1c 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -30,6 +30,9 @@ def setUp(self) -> None: self.graph.set_graph("distance") self.graph.set_blocked_centroid_flows(False) + self.mat = self.project.matrices.get_matrix("demand_omx") + self.mat.computational_view() + def tearDown(self) -> None: self.project.close() @@ -169,8 +172,10 @@ def test_cost_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) - table = table.to_pandas() + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + + table = rc.get_results().to_pandas() + breakpoint() gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: From dbe42bc609e96201a7581f3f1a3d9f1b57f7d422 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 1 Mar 2024 16:02:57 +1000 Subject: [PATCH 02/52] Rudimentary link loading and path file generation --- aequilibrae/paths/route_choice.pxd | 1 + aequilibrae/paths/route_choice.pyx | 123 ++++++++++++++++------------- 2 files changed, 70 insertions(+), 54 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index e173e0cc7..a12fd6c72 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -136,6 +136,7 @@ cdef class RouteChoiceSet: long long [:] ids_graph_view long long [:] compressed_link_ids long long num_nodes + long long num_links long long zones bint block_flows_through_centroids bint a_star diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 211d292af..d1b67f8a5 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -136,6 +136,7 @@ cdef class RouteChoiceSet: self.ids_graph_view = graph.compact_graph.id.values self.num_nodes = graph.compact_num_nodes + self.num_links = graph.compact_num_links self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows @@ -814,6 +815,8 @@ cdef class RouteChoiceSet: cdef: vector[double] *loads vector[double] *route_set_prob + vector[double] *collective_link_loads = new vector[double](self.num_links) # FIXME FREE ME + vector[vector[double] *] *link_loads = new vector[vector[double] *](self.ods.size()) # FIXME FREE ME vector[long long] *link_union vector[long long].const_iterator link_union_iter @@ -830,65 +833,77 @@ cdef class RouteChoiceSet: int i fprintf(stderr, "starting link loading\n") - with nogil, parallel(num_threads=1): - # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. - # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch - fprintf(stderr, "core: %d\n", threadid()) + with nogil: + with parallel(num_threads=1): + # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. + # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch + fprintf(stderr, "core: %d\n", threadid()) + + for i in prange(self.ods.size()): + fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) + + route_set = deref(self.results)[i] + fprintf(stderr, "got route set\n") + link_union = deref(self.link_union_set)[i] + fprintf(stderr, "got link union\n") + route_set_prob = deref(self.prob_set)[i] + fprintf(stderr, "got route set probsk\n") + + fprintf(stderr, "making new loads vector\n") + loads = new vector[double](link_union.size(), 0.0) # FIXME FREE ME + + fprintf(stderr, "starting route iteration\n") + # We now iterate over all routes in the route_set, each route has an associated probability + route_prob_iter = route_set_prob.cbegin() + for route in deref(route_set): + prob = deref(route_prob_iter) + inc(route_prob_iter) + + if prob == 0.0: + continue + + # For each link in the route, we need to assign the appropriate demand * prob + # Because the link union is known to be sorted, if the links in the route are also sorted we can just step + # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us + # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. + # This is also incredibly cache efficient, the only downsides are that the code is harder to read + # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already + # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted + # then used an N-way merge we could reuse the sorted routes and the sorted link union. + links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved # FIXME FREE ME + sort(links.begin(), links.end()) + + # links and link_union are sorted, and links is a subset of link_union + link_union_iter = link_union.cbegin() + link_iter = links.cbegin() + + # fprintf(stderr, "starting link iteration\n") + while link_iter != links.cend(): + # Find the next location for the current link in links + while deref(link_iter) != deref(link_union_iter): + inc(link_union_iter) + + fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter)) + deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob + + inc(link_iter) + + deref(link_loads)[i] = loads + with gil: + print("path file:", origin_index, dest_index, deref(loads)) + + for i in range(self.ods.size()): + loads = deref(link_loads)[i] + link_union = deref(self.link_union_set)[i] - for i in prange(self.ods.size()): origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] demand = matrix_view[origin_index, dest_index] - fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) - route_set = deref(self.results)[i] - fprintf(stderr, "got route set\n") - link_union = deref(self.link_union_set)[i] - fprintf(stderr, "got link union\n") - route_set_prob = deref(self.prob_set)[i] - fprintf(stderr, "got route set probsk\n") - - fprintf(stderr, "making new loads vector\n") - loads = new vector[double](link_union.size(), 0.0) - - fprintf(stderr, "starting route iteration\n") - # We now iterate over all routes in the route_set, each route has an associated probability - route_prob_iter = route_set_prob.cbegin() - for route in deref(route_set): - load = demand * deref(route_prob_iter) - inc(route_prob_iter) - - if load == 0.0: - continue - - # For each link in the route, we need to assign the appropriate demand * prob - # Because the link union is known to be sorted, if the links in the route are also sorted we can just step - # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us - # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. - # This is also incredibly cache efficient, the only downsides are that the code is harder to read - # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already - # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted - # then used an N-way merge we could reuse the sorted routes and the sorted link union. - links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved - sort(links.begin(), links.end()) - - # links and link_union are sorted, and links is a subset of link_union - link_union_iter = link_union.cbegin() - link_iter = links.cbegin() - - # fprintf(stderr, "starting link iteration\n") - while link_iter != links.cend(): - # Find the next location for the current link in links - while deref(link_iter) != deref(link_union_iter): - inc(link_union_iter) - - fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter)) - deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + load - - inc(link_iter) - - with gil: - print(origin_index, dest_index, deref(loads)) + for j in range(link_union.size()): + deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j] + with gil: + print("link loads:", deref(collective_link_loads)) @cython.wraparound(False) From b12b2a910f2239fe9f63ade7410fd23b1869d9d7 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 1 Mar 2024 17:08:28 +1000 Subject: [PATCH 03/52] Fix tests and segfaults --- aequilibrae/paths/route_choice.pyx | 15 ++++++++---- tests/aequilibrae/paths/test_route_choice.py | 24 ++++++++++++-------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index d1b67f8a5..ae648ec5c 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -162,29 +162,36 @@ cdef class RouteChoiceSet: del link_vec del route_set del self.results + self.results = nullptr if self.link_union_set != nullptr: for link_vec in deref(self.link_union_set): del link_vec - del self.link_union_vec + del self.link_union_set + self.link_union_set = nullptr if self.cost_set != nullptr: for double_vec in deref(self.cost_set): del double_vec - del self.cost_vec + del self.cost_set + self.cost_set = nullptr if self.gamma_set != nullptr: for double_vec in deref(self.gamma_set): del double_vec - del self.gamma_vec + del self.gamma_set + self.gamma_set = nullptr if self.prob_set != nullptr: for double_vec in deref(self.prob_set): del double_vec - del self.prob_vec + del self.prob_set + self.prob_set = nullptr if self.ods != nullptr: del self.ods + self.ods = prob_set = nullptr + @cython.embedsignature(True) def run(self, origin: int, destination: int, *args, **kwargs): diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index c02c4ad1c..2e632ac05 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -75,8 +75,9 @@ def test_route_choice_empty_path(self): rc = RouteChoiceSet(self.graph) a = 1 + rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs) self.assertFalse( - rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs), + rc.get_results(), "Route set from self to self should be empty", ) @@ -103,7 +104,8 @@ def test_route_choice_batched(self): nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] max_routes = 20 - results = rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10) + results = rc.get_results() gb = results.to_pandas().groupby(by="origin id") self.assertEqual(len(gb), len(nodes), "Requested number of route sets not returned") @@ -121,7 +123,8 @@ def test_route_choice_duplicates_batched(self): max_routes = 20 with self.assertWarns(UserWarning): - results = rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10) + results = rc.get_results() gb = results.to_pandas().groupby(by="origin id") self.assertEqual(len(gb), 1, "Duplicates not dropped") @@ -153,7 +156,8 @@ def test_round_trip(self): max_routes = 20 path = join(self.project.project_base_path, "batched results") - table = rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10) + table = rc.get_results().to_pandas() rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path) dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) @@ -164,7 +168,7 @@ def test_round_trip(self): .reset_index(drop=True) ) - table = table.to_pandas().sort_values(by=["origin id", "destination id"]).reset_index(drop=True) + table = table.sort_values(by=["origin id", "destination id"]).reset_index(drop=True) pd.testing.assert_frame_equal(table, new_table) @@ -175,7 +179,7 @@ def test_cost_results(self): rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) table = rc.get_results().to_pandas() - breakpoint() + # breakpoint() gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: @@ -186,8 +190,8 @@ def test_gamma_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) - table = table.to_pandas() + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + table = rc.get_results().to_pandas() gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: @@ -197,8 +201,8 @@ def test_prob_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) - table = table.to_pandas() + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + table = rc.get_results().to_pandas() gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: From 79ac2eabd27d06056938a9703a958eaa892eb286 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Fri, 1 Mar 2024 17:09:05 +1000 Subject: [PATCH 04/52] Scratch comments --- aequilibrae/paths/route_choice.pyx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index ae648ec5c..908f85ddf 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -847,7 +847,6 @@ cdef class RouteChoiceSet: fprintf(stderr, "core: %d\n", threadid()) for i in prange(self.ods.size()): - fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) route_set = deref(self.results)[i] fprintf(stderr, "got route set\n") @@ -890,14 +889,14 @@ cdef class RouteChoiceSet: while deref(link_iter) != deref(link_union_iter): inc(link_union_iter) - fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter)) + fprintf(stderr, "adding prob of %f to link %d because link %d is in route\n", prob, deref(link_union_iter), deref(link_iter)) deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob inc(link_iter) deref(link_loads)[i] = loads with gil: - print("path file:", origin_index, dest_index, deref(loads)) + print("path file:", deref(loads)) for i in range(self.ods.size()): loads = deref(link_loads)[i] @@ -906,11 +905,13 @@ cdef class RouteChoiceSet: origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] demand = matrix_view[origin_index, dest_index] + fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) for j in range(link_union.size()): deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j] with gil: print("link loads:", deref(collective_link_loads)) + return deref(collective_link_loads) @cython.wraparound(False) From 3bf495bc78f81375a3840a590df0e45d61c4e5c3 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 09:10:07 +1000 Subject: [PATCH 05/52] Separate path file generation and link loading Add method to map compressed link IDs to network IDs --- aequilibrae/paths/graph.py | 46 ++++ aequilibrae/paths/route_choice.pxd | 14 ++ aequilibrae/paths/route_choice.pyx | 239 ++++++++++++------- tests/aequilibrae/paths/test_route_choice.py | 12 + 4 files changed, 227 insertions(+), 84 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 2a75572c1..02c2e826d 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -4,6 +4,7 @@ from datetime import datetime from os.path import join from typing import List, Tuple, Optional +import functools import numpy as np import pandas as pd @@ -167,6 +168,12 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None: self.__build_compressed_graph() self.compact_num_links = self.compact_graph.shape[0] + # The cache property should be recalculated when the graph has been reprepared + try: + del self.compressed_link_network_mapping + except AttributeError: + pass + def __build_compressed_graph(self): build_compressed_graph(self) @@ -505,6 +512,45 @@ def save_compressed_correspondence(self, path, mode_name, mode_id): node_path = join(path, f"nodes_to_indices_c{mode_name}_{mode_id}.feather") pd.DataFrame(self.nodes_to_indices, columns=["node_index"]).to_feather(node_path) + @functools.cached_property + def compressed_link_network_mapping(self): + """ + Two arrays providing a mapping of compressed id to link id. + + Uses sparse compression. Index ``idx`` by the by compressed id and compressed id + 1, the + network IDs are then in the range ``idx[id]:idx[id + 1]``. + + .. code-block:: python + + >>> idx, data = graph.compressed_link_network_mapping + >>> data[idx[id]:idx[id + 1]] # ==> Slice of network ID's corresponding to the compressed ID + + Links not in the compressed graph are not contained within the ``data`` array. + + :Returns: + **idx** (:obj:`np.array`): index array for ``data`` + **data** (:obj:`np.array`): array of link ids + """ + + # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`, + # we skip them. + filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1] + gb = filtered.groupby(by="__compressed_id__", sort=True) + idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32) + data = np.zeros(len(filtered), dtype=np.uint32) + + i = 0 + for compressed_id, df in gb: + idx[compressed_id] = i + values = df.link_id.values + for j in range(len(values)): + data[i + j] = values[j] + + i += len(values) + + idx[-1] = i + return idx, data + class Graph(GraphBase): def __init__(self, *args, **kwargs): diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index a12fd6c72..32c5f077c 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -148,6 +148,9 @@ cdef class RouteChoiceSet: vector[vector[double] *] *gamma_set vector[vector[double] *] *prob_set + unsigned int [:] mapping_idx + unsigned int [:] mapping_data + cdef void path_find( RouteChoiceSet self, long origin_index, @@ -212,6 +215,17 @@ cdef class RouteChoiceSet: # cdef void link_loading(self, double[:, :] matrix_view) nogil + @staticmethod + cdef vector[vector[double] *] *compute_path_files( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &results, + vector[vector[long long] *] &link_union_set, + vector[vector[double] *] &prob_set + ) noexcept nogil + + cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil + cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil + @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results( vector[pair[long long, long long]] &ods, diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 908f85ddf..f186f0715 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -75,6 +75,7 @@ import itertools import pathlib import logging import warnings +from aequilibrae.matrix import AequilibraeMatrix cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. cimport pyarrow as pa @@ -140,6 +141,8 @@ cdef class RouteChoiceSet: self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows + self.mapping_idx, self.mapping_data = graph.compressed_link_network_mapping + def __dealloc__(self): """ C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a @@ -813,105 +816,173 @@ cdef class RouteChoiceSet: return prob_vec - def link_loading(self, double[:, :] matrix_view): + def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False): if self.ods == nullptr \ or self.link_union_set == nullptr \ or self.prob_set == nullptr: raise ValueError("link loading requires Route Choice path_size_logit results") + if not isinstance(matrix, AequilibraeMatrix): + raise ValueError("`matrix` is not an AequilibraE matrix") + cdef: - vector[double] *loads - vector[double] *route_set_prob - vector[double] *collective_link_loads = new vector[double](self.num_links) # FIXME FREE ME - vector[vector[double] *] *link_loads = new vector[vector[double] *](self.ods.size()) # FIXME FREE ME + vector[vector[double] *] *path_files = nullptr + vector[double] *ll + + if generate_path_files: + path_files = RouteChoiceSet.compute_path_files( + deref(self.ods), + deref(self.results), + deref(self.link_union_set), + deref(self.prob_set) + ) + tmp = [] + for vec in deref(path_files): + tmp.append(deref(vec)) + print(tmp) + + def apply_link_loading_func(m): + if generate_path_files: + ll = self.apply_link_loading_from_path_files( + m, + deref(path_files), + ) + else: + ll = self.apply_link_loading(m) + return deref(ll) + + if len(matrix.view_names) == 1: + link_loads = apply_link_loading_func(matrix.matrix_view) + else: + link_loads = { + name: apply_link_loading_func(matrix.matrix_view[:, :, i]) + for i, name in enumerate(matrix.names) + } + return link_loads + + + @staticmethod + cdef vector[vector[double] *] *compute_path_files( + vector[pair[long long, long long]] &ods, + vector[RouteSet_t *] &results, + vector[vector[long long] *] &link_union_set, + vector[vector[double] *] &prob_set + ) noexcept nogil: + cdef: + vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size()) # FIXME FREE ME vector[long long] *link_union - vector[long long].const_iterator link_union_iter + vector[double] *loads + vector[double] *link - vector[long long] *links + vector[long long].const_iterator link_union_iter vector[long long].const_iterator link_iter - vector[double].const_iterator prob_iter + size_t link_loc + double prob + int i + + with parallel(num_threads=6): + # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. + # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch + + for i in prange(ods.size()): + link_union = link_union_set[i] + loads = new vector[double](link_union.size(), 0.0) # FIXME FREE ME + + # We now iterate over all routes in the route_set, each route has an associated probability + route_prob_iter = prob_set[i].cbegin() + for route in deref(results[i]): + prob = deref(route_prob_iter) + inc(route_prob_iter) + + if prob == 0.0: + continue + + # For each link in the route, we need to assign the appropriate demand * prob + # Because the link union is known to be sorted, if the links in the route are also sorted we can just step + # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us + # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. + # This is also incredibly cache efficient, the only downsides are that the code is harder to read + # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already + # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted + # then used an N-way merge we could reuse the sorted routes and the sorted link union. + links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved # FIXME FREE ME + sort(links.begin(), links.end()) + + # links and link_union are sorted, and links is a subset of link_union + link_union_iter = link_union.cbegin() + link_iter = links.cbegin() + + while link_iter != links.cend(): + # Find the next location for the current link in links + while deref(link_iter) != deref(link_union_iter): + inc(link_union_iter) + link_loc = link_union_iter - link_union.cbegin() + deref(loads)[link_loc] = deref(loads)[link_loc] + prob # += here results in all zeros? Odd + + inc(link_iter) + + deref(link_loads)[i] = loads + + return link_loads + + cdef vector[double] *apply_link_loading_from_path_files( + RouteChoiceSet self, + double[:, :] matrix_view, + vector[vector[double] *] &path_files + ) noexcept nogil: + cdef: + vector[double] *loads + vector[long long] *link_union + long origin_index, dest_index + double demand + + vector[double] *link_loads = new vector[double](self.num_links) # FIXME FREE ME + + for i in range(self.ods.size()): + loads = path_files[i] + link_union = deref(self.link_union_set)[i] + + origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] + dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] + demand = matrix_view[origin_index, dest_index] + + for j in range(link_union.size()): + link = deref(link_union)[j] + deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j] # += here results in all zeros? Odd + + return link_loads + + cdef vector[double] *apply_link_loading(self, double[:, :] matrix_view) noexcept nogil: + cdef: RouteSet_t *route_set - double demand, load, prob - size_t length + vector[double] *route_set_prob long origin_index, dest_index - int i + double demand, prob, load + + vector[double] *link_loads = new vector[double](self.num_links) # FIXME FREE ME + + for i in range(self.ods.size()): + route_set = deref(self.results)[i] + route_set_prob = deref(self.prob_set)[i] + + origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] + dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] + demand = matrix_view[origin_index, dest_index] + + route_prob_iter = route_set_prob.cbegin() + for route in deref(route_set): + prob = deref(route_prob_iter) + inc(route_prob_iter) + + load = prob * demand + for link in deref(route): + deref(link_loads)[link] = deref(link_loads)[link] + load # += here results in all zeros? Odd + + return link_loads - fprintf(stderr, "starting link loading\n") - with nogil: - with parallel(num_threads=1): - # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. - # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch - fprintf(stderr, "core: %d\n", threadid()) - - for i in prange(self.ods.size()): - - route_set = deref(self.results)[i] - fprintf(stderr, "got route set\n") - link_union = deref(self.link_union_set)[i] - fprintf(stderr, "got link union\n") - route_set_prob = deref(self.prob_set)[i] - fprintf(stderr, "got route set probsk\n") - - fprintf(stderr, "making new loads vector\n") - loads = new vector[double](link_union.size(), 0.0) # FIXME FREE ME - - fprintf(stderr, "starting route iteration\n") - # We now iterate over all routes in the route_set, each route has an associated probability - route_prob_iter = route_set_prob.cbegin() - for route in deref(route_set): - prob = deref(route_prob_iter) - inc(route_prob_iter) - - if prob == 0.0: - continue - - # For each link in the route, we need to assign the appropriate demand * prob - # Because the link union is known to be sorted, if the links in the route are also sorted we can just step - # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us - # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. - # This is also incredibly cache efficient, the only downsides are that the code is harder to read - # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already - # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted - # then used an N-way merge we could reuse the sorted routes and the sorted link union. - links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved # FIXME FREE ME - sort(links.begin(), links.end()) - - # links and link_union are sorted, and links is a subset of link_union - link_union_iter = link_union.cbegin() - link_iter = links.cbegin() - - # fprintf(stderr, "starting link iteration\n") - while link_iter != links.cend(): - # Find the next location for the current link in links - while deref(link_iter) != deref(link_union_iter): - inc(link_union_iter) - - fprintf(stderr, "adding prob of %f to link %d because link %d is in route\n", prob, deref(link_union_iter), deref(link_iter)) - deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob - - inc(link_iter) - - deref(link_loads)[i] = loads - with gil: - print("path file:", deref(loads)) - - for i in range(self.ods.size()): - loads = deref(link_loads)[i] - link_union = deref(self.link_union_set)[i] - - origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] - dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] - demand = matrix_view[origin_index, dest_index] - fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand) - - for j in range(link_union.size()): - deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j] - with gil: - print("link loads:", deref(collective_link_loads)) - return deref(collective_link_loads) @cython.wraparound(False) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 2e632ac05..a63ef6ba5 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -208,6 +208,18 @@ def test_prob_results(self): for od, df in gb: self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0") + def test_link_loading(self): + + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + + link_loads = rc.link_loading(self.mat) + link_loads2 = rc.link_loading(self.mat, generate_path_files=True) + + np.testing.assert_array_almost_equal(link_loads, link_loads2) + def generate_line_strings(project, graph, results): """Debug method""" From 61edffedc1bf5274a033333f814c3ee1d087737f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 10:41:07 +1000 Subject: [PATCH 06/52] Fix link ID ordering in compressed -> network mapping --- aequilibrae/paths/graph.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 02c2e826d..6ddf56a3f 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -532,6 +532,9 @@ def compressed_link_network_mapping(self): **data** (:obj:`np.array`): array of link ids """ + # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't + # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute + # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`, # we skip them. filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1] @@ -543,13 +546,25 @@ def compressed_link_network_mapping(self): for compressed_id, df in gb: idx[compressed_id] = i values = df.link_id.values - for j in range(len(values)): - data[i + j] = values[j] + a = df.a_node.values + b = df.b_node.values + + # In order to ensure that the link IDs come out in the correct order we must walk the links + # we do this assuming the `a` array is sorted. + j = 0 + x = self.compact_graph.a_node.iat[compressed_id] + while True: + tmp = a.searchsorted(x) + if tmp < len(a) and a[tmp] == x: + x = b[tmp] + data[i + j] = values[tmp] + else: + break + j += 1 i += len(values) idx[-1] = i - return idx, data class Graph(GraphBase): From d1c494ed88a4aabc9916dd2cb56cd11ff0032fe1 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:08:18 +1000 Subject: [PATCH 07/52] We don't need functools for this --- aequilibrae/paths/graph.py | 27 ++++++++++++++++++--------- aequilibrae/paths/route_choice.pyx | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index 6ddf56a3f..a9cfead21 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -4,7 +4,6 @@ from datetime import datetime from os.path import join from typing import List, Tuple, Optional -import functools import numpy as np import pandas as pd @@ -96,6 +95,9 @@ def __init__(self, logger=None): self.dead_end_links = np.array([]) + self.compressed_link_network_mapping_idx = None + self.compressed_link_network_mapping_data = None + # Randomly generate a unique Graph ID randomly self._id = uuid.uuid4().hex @@ -168,11 +170,9 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None: self.__build_compressed_graph() self.compact_num_links = self.compact_graph.shape[0] - # The cache property should be recalculated when the graph has been reprepared - try: - del self.compressed_link_network_mapping - except AttributeError: - pass + # The cache property should be recalculated when the graph has been re-prepared + self.compressed_link_network_mapping_idx = None + self.compressed_link_network_mapping_data = None def __build_compressed_graph(self): build_compressed_graph(self) @@ -512,10 +512,9 @@ def save_compressed_correspondence(self, path, mode_name, mode_id): node_path = join(path, f"nodes_to_indices_c{mode_name}_{mode_id}.feather") pd.DataFrame(self.nodes_to_indices, columns=["node_index"]).to_feather(node_path) - @functools.cached_property - def compressed_link_network_mapping(self): + def create_compressed_link_network_mapping(self): """ - Two arrays providing a mapping of compressed id to link id. + Create two arrays providing a mapping of compressed id to link id. Uses sparse compression. Index ``idx`` by the by compressed id and compressed id + 1, the network IDs are then in the range ``idx[id]:idx[id + 1]``. @@ -532,6 +531,11 @@ def compressed_link_network_mapping(self): **data** (:obj:`np.array`): array of link ids """ + # Cache the result, this isn't a huge computation but isn't worth doing twice + if self.compressed_link_network_mapping_idx is not None \ + and self.compressed_link_network_mapping_data is not None: + return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data + # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute @@ -566,6 +570,11 @@ def compressed_link_network_mapping(self): idx[-1] = i + self.compressed_link_network_mapping_idx = idx + self.compressed_link_network_mapping_data = data + + return idx, data + class Graph(GraphBase): def __init__(self, *args, **kwargs): diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index f186f0715..0d0f4529f 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -141,7 +141,7 @@ cdef class RouteChoiceSet: self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows - self.mapping_idx, self.mapping_data = graph.compressed_link_network_mapping + self.mapping_idx, self.mapping_data = graph.create_compressed_link_network_mapping() def __dealloc__(self): """ From fbc04b784ca0a5f128421036d23306333c1aa97c Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:09:17 +1000 Subject: [PATCH 08/52] Reverse routes during computation, map link IDs during output --- aequilibrae/paths/route_choice.pxd | 3 ++- aequilibrae/paths/route_choice.pyx | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index 32c5f077c..a36bb6f1a 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -118,6 +118,7 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil: libpa.CStatus Append(const uint32_t value) libpa.CStatus AppendValues(const vector[uint32_t] &values) libpa.CStatus AppendValues(vector[uint32_t].const_reverse_iterator values_begin, vector[uint32_t].const_reverse_iterator values_end) + libpa.CStatus AppendValues(const uint32_t *values, int64_t length, const uint8_t *valid_bytes = nullptr) cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(libpa.CArrayBuilder): CDoubleBuilder(libpa.CMemoryPool* pool) @@ -226,8 +227,8 @@ cdef class RouteChoiceSet: cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil - @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results( + RouteChoiceSet self, vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets, vector[vector[double] *] *cost_set, diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 0d0f4529f..01e108e79 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -63,7 +63,7 @@ from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map from libcpp.utility cimport pair -from libcpp.algorithm cimport sort, lower_bound +from libcpp.algorithm cimport sort, lower_bound, reverse from cython.operator cimport dereference as deref, preincrement as inc from cython.parallel cimport parallel, prange, threadid cimport openmp @@ -433,7 +433,7 @@ cdef class RouteChoiceSet: del link_union_scratch if where is not None: - table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) + table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures if path_size_logit: @@ -584,6 +584,8 @@ cdef class RouteChoiceSet: p = thread_predecessors[p] vec.push_back(connector) + reverse(vec.begin(), vec.end()) + for connector in deref(vec): # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements # then add a single element. An incremental set hash function could be of use. However, the since of this set is @@ -661,6 +663,8 @@ cdef class RouteChoiceSet: p = thread_predecessors[p] vec.push_back(connector) + reverse(vec.begin(), vec.end()) + for connector in deref(vec): thread_cost[connector] *= penatly @@ -989,8 +993,8 @@ cdef class RouteChoiceSet: @cython.embedsignature(True) @cython.boundscheck(False) @cython.initializedcheck(False) - @staticmethod cdef shared_ptr[libpa.CTable] make_table_from_results( + RouteChoiceSet self, vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets, vector[vector[double] *] *cost_set, @@ -1018,6 +1022,7 @@ cdef class RouteChoiceSet: libpa.CResult[shared_ptr[libpa.CArray]] route_set_results int offset = 0 + size_t network_link_begin, network_link_end, link bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr) # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route] @@ -1043,9 +1048,17 @@ cdef class RouteChoiceSet: d_col.Append(ods[i].second) offset_builder.Append(offset) - path_builder.AppendValues(route.crbegin(), route.crend()) - offset += route.size() + for link in deref(route): + # Translate the compressed link IDs in route to network link IDs, this is a 1:n mapping + network_link_begin = self.mapping_idx[link] + network_link_end = self.mapping_idx[link + 1] + path_builder.AppendValues( + &self.mapping_data[network_link_begin], + network_link_end - network_link_begin + ) + + offset += network_link_end - network_link_begin path_builder.Finish(&paths) @@ -1088,7 +1101,7 @@ cdef class RouteChoiceSet: raise ValueError("Route Choice results not computed yet") table = libpa.pyarrow_wrap_table( - RouteChoiceSet.make_table_from_results( + self.make_table_from_results( deref(self.ods), deref(self.results), self.cost_set, From 9a7ee100510bcf190dbb5cffeb7b914552353ce4 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:09:55 +1000 Subject: [PATCH 09/52] Fix tests --- tests/aequilibrae/paths/test_route_choice.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index a63ef6ba5..a33086732 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -34,6 +34,7 @@ def setUp(self) -> None: self.mat.computational_view() def tearDown(self) -> None: + self.mat.close() self.project.close() def test_route_choice(self): @@ -50,7 +51,7 @@ def test_route_choice(self): results = rc.run(a, b, max_routes=0, max_depth=1) self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route") self.assertListEqual( - results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route" + results, [(2, 6, 9, 13, 25, 30, 53, 59)], "Initial route isn't the shortest A* route" ) # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths @@ -184,7 +185,11 @@ def test_cost_results(self): gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: for route, cost in zip(df["route set"].values, df["cost"].values): - np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}") + np.testing.assert_almost_equal( + self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(), + cost, + err_msg=f", cost differs for OD {od}" + ) def test_gamma_results(self): np.random.seed(0) @@ -206,7 +211,7 @@ def test_prob_results(self): gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: - self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0") + self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0") def test_link_loading(self): From ea0853d6bda845ff0a0eda31bae288eb27b5f8e6 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:14:01 +1000 Subject: [PATCH 10/52] Fix windows compilation --- aequilibrae/paths/route_choice.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 01e108e79..172b2f2e0 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -884,7 +884,7 @@ cdef class RouteChoiceSet: size_t link_loc double prob - int i + long long i with parallel(num_threads=6): # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. From da984187ce51994628b9dd939a94392254b1fe31 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:16:04 +1000 Subject: [PATCH 11/52] Linting --- aequilibrae/paths/graph.py | 6 ++++-- tests/aequilibrae/paths/test_route_choice.py | 3 +-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index a9cfead21..e25ed8353 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -532,8 +532,10 @@ def create_compressed_link_network_mapping(self): """ # Cache the result, this isn't a huge computation but isn't worth doing twice - if self.compressed_link_network_mapping_idx is not None \ - and self.compressed_link_network_mapping_data is not None: + if ( + self.compressed_link_network_mapping_idx is not None + and self.compressed_link_network_mapping_data is not None + ): return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index a33086732..38decf374 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -188,7 +188,7 @@ def test_cost_results(self): np.testing.assert_almost_equal( self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(), cost, - err_msg=f", cost differs for OD {od}" + err_msg=f", cost differs for OD {od}", ) def test_gamma_results(self): @@ -214,7 +214,6 @@ def test_prob_results(self): self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0") def test_link_loading(self): - np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] From 459d4be031eb7278f29f79746bb25b78a5677243 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:26:48 +1000 Subject: [PATCH 12/52] Add ruff to pre-commit hooks --- .pre-commit-config.yaml | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0e3fbbc53..9a0759689 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,9 +1,18 @@ repos: -- repo: https://github.com/ambv/black - rev: 22.3.0 - hooks: +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.3.0 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format +- repo: https://github.com/ambv/black + rev: 22.3.0 + hooks: - id: black -- repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 \ No newline at end of file +- repo: https://github.com/pycqa/flake8 + rev: 4.0.1 + hooks: + - id: flake8 From 19f0d7e07150607d6db01ca6dd6ead557e520504 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 12:31:41 +1000 Subject: [PATCH 13/52] Update black pre-commit hook and drop flake8 --- .pre-commit-config.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9a0759689..d8b2a859c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,10 +9,6 @@ repos: # Run the formatter. - id: ruff-format - repo: https://github.com/ambv/black - rev: 22.3.0 + rev: 24.1.1 hooks: - id: black -- repo: https://github.com/pycqa/flake8 - rev: 4.0.1 - hooks: - - id: flake8 From 192748fb5d13fa262dd5faf28b65ab1274cfbcd8 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 15:29:50 +1000 Subject: [PATCH 14/52] Translate link loads from compressed IDs to graph IDs when link Add decorators as well --- aequilibrae/paths/route_choice.pxd | 6 +-- aequilibrae/paths/route_choice.pyx | 59 ++++++++++++++++++++++++++---- 2 files changed, 54 insertions(+), 11 deletions(-) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd index a36bb6f1a..62df3e43f 100644 --- a/aequilibrae/paths/route_choice.pxd +++ b/aequilibrae/paths/route_choice.pxd @@ -135,6 +135,7 @@ cdef class RouteChoiceSet: double [:] lat_view double [:] lon_view long long [:] ids_graph_view + long long [:] graph_compressed_id_view long long [:] compressed_link_ids long long num_nodes long long num_links @@ -214,14 +215,13 @@ cdef class RouteChoiceSet: double theta ) noexcept nogil - # cdef void link_loading(self, double[:, :] matrix_view) nogil - @staticmethod cdef vector[vector[double] *] *compute_path_files( vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &results, vector[vector[long long] *] &link_union_set, - vector[vector[double] *] &prob_set + vector[vector[double] *] &prob_set, + unsigned int cores ) noexcept nogil cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx index 172b2f2e0..6f2904bdb 100644 --- a/aequilibrae/paths/route_choice.pyx +++ b/aequilibrae/paths/route_choice.pyx @@ -88,6 +88,7 @@ from libc.stdio cimport fprintf, printf, stderr # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times include 'basic_path_finding.pyx' +include 'parallel_numpy.pyx' @cython.embedsignature(True) cdef class RouteChoiceSet: @@ -136,6 +137,7 @@ cdef class RouteChoiceSet: self.a_star = False self.ids_graph_view = graph.compact_graph.id.values + self.graph_compressed_id_view = graph.graph.__compressed_id__.values self.num_nodes = graph.compact_num_nodes self.num_links = graph.compact_num_links self.zones = graph.num_zones @@ -820,7 +822,8 @@ cdef class RouteChoiceSet: return prob_vec - def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False): + @cython.embedsignature(True) + def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0): if self.ods == nullptr \ or self.link_union_set == nullptr \ or self.prob_set == nullptr: @@ -829,6 +832,8 @@ cdef class RouteChoiceSet: if not isinstance(matrix, AequilibraeMatrix): raise ValueError("`matrix` is not an AequilibraE matrix") + cores = cores if cores > 0 else openmp.omp_get_num_threads() + cdef: vector[vector[double] *] *path_files = nullptr vector[double] *ll @@ -838,7 +843,8 @@ cdef class RouteChoiceSet: deref(self.ods), deref(self.results), deref(self.link_union_set), - deref(self.prob_set) + deref(self.prob_set), + cores, ) tmp = [] for vec in deref(path_files): @@ -853,7 +859,17 @@ cdef class RouteChoiceSet: ) else: ll = self.apply_link_loading(m) - return deref(ll) + + actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64) + assign_link_loads_cython( + actual, + # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship. + &deref(ll)[0], + self.graph_compressed_id_view, + cores + ) + del ll + return actual if len(matrix.view_names) == 1: link_loads = apply_link_loading_func(matrix.matrix_view) @@ -865,14 +881,23 @@ cdef class RouteChoiceSet: return link_loads - + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) @staticmethod cdef vector[vector[double] *] *compute_path_files( vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &results, vector[vector[long long] *] &link_union_set, - vector[vector[double] *] &prob_set + vector[vector[double] *] &prob_set, + unsigned int cores ) noexcept nogil: + """ + Computes the path files for the provided vector of RouteSets. + + Returns vector of vectors of link loads corresponding to each link in it's link_union_set. + """ cdef: vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size()) # FIXME FREE ME vector[long long] *link_union @@ -886,7 +911,7 @@ cdef class RouteChoiceSet: double prob long long i - with parallel(num_threads=6): + with parallel(num_threads=cores): # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch @@ -932,11 +957,22 @@ cdef class RouteChoiceSet: return link_loads + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) cdef vector[double] *apply_link_loading_from_path_files( RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files ) noexcept nogil: + """ + Apply link loading from path files. + + If path files have already been computed then this is a more efficient manner for the link loading. + + Returns a vector of link loads indexed by compressed link ID. + """ cdef: vector[double] *loads vector[long long] *link_union @@ -959,7 +995,16 @@ cdef class RouteChoiceSet: return link_loads + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) cdef vector[double] *apply_link_loading(self, double[:, :] matrix_view) noexcept nogil: + """ + Apply link loading. + + Returns a vector of link loads indexed by compressed link ID. + """ cdef: RouteSet_t *route_set vector[double] *route_set_prob @@ -987,8 +1032,6 @@ cdef class RouteChoiceSet: return link_loads - - @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) From 6bdb1ffe8b069ffbf6764abbac9c3baa66309561 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 16:39:31 +1000 Subject: [PATCH 15/52] Rename Cython file to avoid name clash --- aequilibrae/paths/{route_choice.pxd => route_choice_set.pxd} | 0 aequilibrae/paths/{route_choice.pyx => route_choice_set.pyx} | 0 setup.py | 4 ++-- tests/aequilibrae/paths/test_route_choice.py | 5 ++--- 4 files changed, 4 insertions(+), 5 deletions(-) rename aequilibrae/paths/{route_choice.pxd => route_choice_set.pxd} (100%) rename aequilibrae/paths/{route_choice.pyx => route_choice_set.pyx} (100%) diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice_set.pxd similarity index 100% rename from aequilibrae/paths/route_choice.pxd rename to aequilibrae/paths/route_choice_set.pxd diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice_set.pyx similarity index 100% rename from aequilibrae/paths/route_choice.pyx rename to aequilibrae/paths/route_choice_set.pyx diff --git a/setup.py b/setup.py index b68b463a6..e9a851217 100644 --- a/setup.py +++ b/setup.py @@ -61,8 +61,8 @@ ) ext_mod_bfs_le = Extension( - "aequilibrae.paths.route_choice", - [join("aequilibrae", "paths", "route_choice.pyx")], + "aequilibrae.paths.route_choice_set", + [join("aequilibrae", "paths", "route_choice_set.pyx")], extra_compile_args=compile_args, extra_link_args=link_args, define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 38decf374..5d310078d 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -9,13 +9,13 @@ import pyarrow as pa from aequilibrae import Graph, Project -from aequilibrae.paths.route_choice import RouteChoiceSet +from aequilibrae.paths.route_choice_set import RouteChoiceSet from ...data import siouxfalls_project # In these tests `max_depth` should be provided to prevent a runaway test case and just burning CI time -class TestRouteChoice(TestCase): +class TestRouteChoiceSet(TestCase): def setUp(self) -> None: os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] @@ -180,7 +180,6 @@ def test_cost_results(self): rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) table = rc.get_results().to_pandas() - # breakpoint() gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: From 27c6d85a4a173b7b3b2abea3265280795782e60f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 6 Mar 2024 17:11:12 +1000 Subject: [PATCH 16/52] Add wrapper object and begin API work --- aequilibrae/paths/__init__.py | 1 + aequilibrae/paths/route_choice.py | 175 +++++++++++++++++++ aequilibrae/paths/route_choice_set.pyx | 21 ++- tests/aequilibrae/paths/test_route_choice.py | 1 + 4 files changed, 191 insertions(+), 7 deletions(-) create mode 100644 aequilibrae/paths/route_choice.py diff --git a/aequilibrae/paths/__init__.py b/aequilibrae/paths/__init__.py index 61af0cc6e..42b21a0f6 100644 --- a/aequilibrae/paths/__init__.py +++ b/aequilibrae/paths/__init__.py @@ -8,6 +8,7 @@ from aequilibrae.paths.traffic_assignment import TrafficAssignment, TransitAssignment from aequilibrae.paths.vdf import VDF from aequilibrae.paths.graph import Graph, TransitGraph +from aequilibrae.paths.route_choice import RouteChoice from aequilibrae import global_logger diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py new file mode 100644 index 000000000..9de098905 --- /dev/null +++ b/aequilibrae/paths/route_choice.py @@ -0,0 +1,175 @@ +import numpy as np +import socket +from aequilibrae.context import get_active_project +from aequilibrae.paths.graph import Graph +from aequilibrae.paths.route_choice_set import RouteChoiceSet +from typing import Optional +import pyarrow as pa +import pathlib + +import logging + + +class RouteChoice: + all_algorithms = ["bfsle", "lp", "link-penalisation"] + default_paramaters = { + "beta": 1.0, + "theta": 1.0, + "penalty": 1.1, + "seed": 0, + "max_routes": 0, + "max_depth": 0, + } + + def __init__(self, graph: Graph, project=None): + self.paramaters = self.default_paramaters.copy() + + proj = project or get_active_project(must_exist=False) + self.project = proj + + self.logger = proj.logger if proj else logging.getLogger("aequilibrae") + + self.cores: int = 0 + self.graph = graph + self.__rc = RouteChoiceSet(graph) + + self.schema = RouteChoiceSet.schema + self.psl_schema = RouteChoiceSet.psl_schema + + self.compact_link_loads: Optional[np.array] = None + self.link_loads: Optional[np.array] = None + self.results: Optional[pa.Table] = None + self.where: Optional[pathlib.Path] = None + + def set_algorithm(self, algorithm: str): + """ + Chooses the assignment algorithm. + Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation' + + 'lp' is also accepted as an alternative to 'link-penalisation' + + :Arguments: + **algorithm** (:obj:`str`): Algorithm to be used + """ + algo_dict = {i: i for i in self.all_algorithms} + algo_dict["lp"] = "link-penalisation" + algo = algo_dict.get(algorithm.lower()) + + if algo is None: + raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}") + + self.algorithm = algo + self._config["Algorithm"] = algo + + def set_cores(self, cores: int) -> None: + """Allows one to set the number of cores to be used + + Inherited from :obj:`AssignmentResultsBase` + + :Arguments: + **cores** (:obj:`int`): Number of CPU cores to use + """ + if not self.classes: + raise RuntimeError("You need load transit classes before overwriting the number of cores") + + self.cores = cores + + def set_paramaters(self, par: dict): + """ + Sets the parameters for the route choice TODO, do we want link specific values? + + "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation. + "penalty" is a link penalisation specific parameter and will have no effect on BFSLE. + + Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms. + + - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is + largely dependent on the size of the paths within the network. For very small networks a value of 10 + is a recommended starting point. For large networks a good starting value is 5. Increase the value + until the number of desired routes is being consistently returned. + + - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced, + it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field, + specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. + + + Parameter values can be scalars (same values for the entire network) or network field names + (link-specific values) - Examples: {'alpha': 0.15, 'beta': 4.0} or {'alpha': 'alpha', 'beta': 'beta'} + + + :Arguments: + **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF + """ + + if any(key not in self.default_paramaters for key in par.keys()): + raise ValueError("Invalid parameter provided") + + self.paramaters = self.default_paramaters | par + + def set_save_path_files(self, save_it: bool) -> None: + """Turn path saving on or off. + + :Arguments: + **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved + """ + self.save_path_files = save_it + + def set_save_routes(self, where: Optional[str] = None) -> None: + """ + Set save path for route choice resutls. Provide ``None`` to disable. + + **warning** enabling route saving will disable in memory results. Viewing the results will read the results + from disk first. + + :Arguments: + **save_it** (:obj:`bool`): Boolean to indicate whether routes should be saved + """ + self.where = pathlib.Path(where) if where is not None else None + + def info(self) -> dict: + """Returns information for the transit assignment procedure + + Dictionary contains keys 'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID'. + + The classes key is also a dictionary with all the user classes per transit class and their respective + matrix totals + + :Returns: + **info** (:obj:`dict`): Dictionary with summary information + """ + + matrix_totals = {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)} + + info = { + "Algorithm": self.algorithm, + "Matrix totals": matrix_totals, + "Computer name": socket.gethostname(), + "Procedure ID": self.procedure_id, + "Parameters": self.paramaters, + } + return info + + def log_specification(self): + self.logger.info("Route Choice specification") + self.logger.info(self._config) + + def results(self): + """Returns the results of the route choice procedure + + Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. + + :Returns: + **results** (:obj:`pa.Table`): Table with the results of the route choice procedure + + """ + if self.results is None: + try: + self.results = self.__rc.get_results() + except RuntimeError as err: + if self.where is None: + raise ValueError("Route choice results not computed and read/save path not specificed") from err + self.results = pa.dataset.dataset( + self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema) + ) + + return self.results diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 6f2904bdb..3d0786658 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -52,7 +52,7 @@ routes aren't required small-ish things like the memcpy and banned link set copy """ -from aequilibrae import Graph +from aequilibrae.paths.graph import Graph from libc.math cimport INFINITY, pow, exp from libc.string cimport memcpy @@ -846,11 +846,14 @@ cdef class RouteChoiceSet: deref(self.prob_set), cores, ) + + # FIXME, write out path files tmp = [] for vec in deref(path_files): tmp.append(deref(vec)) print(tmp) + def apply_link_loading_func(m): if generate_path_files: ll = self.apply_link_loading_from_path_files( @@ -860,16 +863,20 @@ cdef class RouteChoiceSet: else: ll = self.apply_link_loading(m) + # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship. + compressed = &deref(ll)[0] + actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64) assign_link_loads_cython( actual, - # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship. - &deref(ll)[0], + compressed, self.graph_compressed_id_view, cores ) + compressed = np.array(compressed, copy=True) del ll - return actual + return actual.reshape(-1), compressed.reshape(-1) + if len(matrix.view_names) == 1: link_loads = apply_link_loading_func(matrix.matrix_view) @@ -1137,11 +1144,11 @@ cdef class RouteChoiceSet: def get_results(self): # Cython doesn't like this type annotation... -> pa.Table: """ :Returns: - **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of compact link IDs for - each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None. + **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of link IDs for + each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. """ if self.results == nullptr or self.ods == nullptr: - raise ValueError("Route Choice results not computed yet") + raise RuntimeError("Route Choice results not computed yet") table = libpa.pyarrow_wrap_table( self.make_table_from_results( diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 5d310078d..879dfda6a 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -10,6 +10,7 @@ from aequilibrae import Graph, Project from aequilibrae.paths.route_choice_set import RouteChoiceSet +from aequilibrae.paths.route_choice import RouteChoice from ...data import siouxfalls_project From a6bcf8607123c51e6722d186a0e7451351976610 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 12 Mar 2024 15:39:06 +1000 Subject: [PATCH 17/52] Cannot rely on the ordering of nodes when building the mapping --- aequilibrae/paths/graph.py | 3 ++- tests/aequilibrae/paths/test_route_choice.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index e25ed8353..f28834151 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -558,7 +558,8 @@ def create_compressed_link_network_mapping(self): # In order to ensure that the link IDs come out in the correct order we must walk the links # we do this assuming the `a` array is sorted. j = 0 - x = self.compact_graph.a_node.iat[compressed_id] + # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup + x = a[np.isin(a, b, invert=True, assume_unique=True)][0] while True: tmp = a.searchsorted(x) if tmp < len(a) and a[tmp] == x: diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 879dfda6a..c31c7b6a2 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -186,7 +186,7 @@ def test_cost_results(self): for od, df in gb: for route, cost in zip(df["route set"].values, df["cost"].values): np.testing.assert_almost_equal( - self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(), + self.graph.network.set_index("link_id").loc[route][self.graph.cost_field].sum(), cost, err_msg=f", cost differs for OD {od}", ) From ed03ca07de82f274a23df3031df6165ecf88d1b0 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 13 Mar 2024 11:38:06 +1000 Subject: [PATCH 18/52] Rename gamma -> path_overlap --- aequilibrae/paths/route_choice_set.pxd | 8 +-- aequilibrae/paths/route_choice_set.pyx | 72 ++++++++++---------- tests/aequilibrae/paths/test_route_choice.py | 4 +- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index 62df3e43f..e80a41c6a 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -147,7 +147,7 @@ cdef class RouteChoiceSet: vector[RouteSet_t *] *results vector[vector[long long] *] *link_union_set vector[vector[double] *] *cost_set - vector[vector[double] *] *gamma_set + vector[vector[double] *] *path_overlap_set vector[vector[double] *] *prob_set unsigned int [:] mapping_idx @@ -200,7 +200,7 @@ cdef class RouteChoiceSet: cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil @staticmethod - cdef vector[double] *compute_gamma( + cdef vector[double] *compute_path_overlap( RouteSet_t *route_set, pair[vector[long long] *, vector[long long] *] &freq_set, vector[double] &total_cost, @@ -210,7 +210,7 @@ cdef class RouteChoiceSet: @staticmethod cdef vector[double] *compute_prob( vector[double] &total_cost, - vector[double] &gamma_vec, + vector[double] &path_overlap_vec, double beta, double theta ) noexcept nogil @@ -232,7 +232,7 @@ cdef class RouteChoiceSet: vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets, vector[vector[double] *] *cost_set, - vector[vector[double] *] *gamma_set, + vector[vector[double] *] *path_overlap_set, vector[vector[double] *] *prob_set ) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 3d0786658..d6ed083f0 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -110,7 +110,7 @@ cdef class RouteChoiceSet: pa.field("destination id", pa.uint32(), nullable=False), pa.field("route set", route_set_dtype, nullable=False), pa.field("cost", pa.float64(), nullable=False), - pa.field("gamma", pa.float64(), nullable=False), + pa.field("path overlap", pa.float64(), nullable=False), pa.field("probability", pa.float64(), nullable=False), ]) @@ -119,7 +119,7 @@ cdef class RouteChoiceSet: results = nullptr link_union_set = nullptr cost_set = nullptr - gamma_set = nullptr + path_overlap_set = nullptr prob_set = nullptr ods = nullptr @@ -181,11 +181,11 @@ cdef class RouteChoiceSet: del self.cost_set self.cost_set = nullptr - if self.gamma_set != nullptr: - for double_vec in deref(self.gamma_set): + if self.path_overlap_set != nullptr: + for double_vec in deref(self.path_overlap_set): del double_vec - del self.gamma_set - self.gamma_set = nullptr + del self.path_overlap_set + self.path_overlap_set = nullptr if self.prob_set != nullptr: for double_vec in deref(self.prob_set): @@ -330,13 +330,13 @@ cdef class RouteChoiceSet: vector[long long] *link_union_scratch = nullptr vector[vector[long long] *] *link_union_set = nullptr vector[vector[double] *] *cost_set = nullptr - vector[vector[double] *] *gamma_set = nullptr + vector[vector[double] *] *path_overlap_set = nullptr vector[vector[double] *] *prob_set = nullptr if path_size_logit: link_union_set = new vector[vector[long long] *](max_results_len) cost_set = new vector[vector[double] *](max_results_len) - gamma_set = new vector[vector[double] *](max_results_len) + path_overlap_set = new vector[vector[double] *](max_results_len) prob_set = new vector[vector[double] *](max_results_len) self.deallocate_results() # We have be storing results from a previous run @@ -352,12 +352,12 @@ cdef class RouteChoiceSet: # - the internal objects were freed by the previous iteration link_union_set.clear() cost_set.clear() - gamma_set.clear() + path_overlap_set.clear() prob_set.clear() link_union_set.resize(batch_len) cost_set.resize(batch_len) - gamma_set.resize(batch_len) + path_overlap_set.resize(batch_len) prob_set.resize(batch_len) with nogil, parallel(num_threads=c_cores): @@ -415,8 +415,8 @@ cdef class RouteChoiceSet: freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch)) deref(link_union_set)[i] = freq_pair.first deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view) - deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) - deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta) + deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) + deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(path_overlap_set)[i]), beta, theta) del freq_pair.second # While we need the unique sorted links (.first), we don't need the frequencies (.second) deref(results)[i] = route_set @@ -435,14 +435,14 @@ cdef class RouteChoiceSet: del link_union_scratch if where is not None: - table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set)) + table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set)) # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures if path_size_logit: for j in range(batch_len): del deref(link_union_set)[j] del deref(cost_set)[j] - del deref(gamma_set)[j] + del deref(path_overlap_set)[j] del deref(prob_set)[j] for j in range(batch_len): @@ -462,13 +462,13 @@ cdef class RouteChoiceSet: if path_size_logit: del link_union_set del cost_set - del gamma_set + del path_overlap_set del prob_set else: self.results = results self.link_union_set = link_union_set self.cost_set = cost_set - self.gamma_set = gamma_set + self.path_overlap_set = path_overlap_set self.prob_set = prob_set # Copy the c_ods vector, it was provided by the auto Cython conversion and is allocated on the stack, @@ -748,7 +748,7 @@ cdef class RouteChoiceSet: @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef vector[double] *compute_gamma( + cdef vector[double] *compute_path_overlap( RouteSet_t *route_set, pair[vector[long long] *, vector[long long] *] &freq_set, vector[double] &total_cost, @@ -764,32 +764,32 @@ cdef class RouteChoiceSet: sum_{k in R}: delta_{a,k}: freq_set """ cdef: - vector[double] *gamma_vec + vector[double] *path_overlap_vec # Scratch objects vector[long long].const_iterator link_iter - double gamma + double path_overlap long long link, j size_t i - gamma_vec = new vector[double]() - gamma_vec.reserve(route_set.size()) + path_overlap_vec = new vector[double]() + path_overlap_vec.reserve(route_set.size()) j = 0 for route in deref(route_set): - gamma = 0.0 + path_overlap = 0.0 for link in deref(route): # We know the frequency table is ordered and contains every link in the union of the routes. # We want to find the index of the link, and use that to look up it's frequency link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) - gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + path_overlap = path_overlap + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] - gamma_vec.push_back(gamma / total_cost[j]) + path_overlap_vec.push_back(path_overlap / total_cost[j]) j = j + 1 - return gamma_vec + return path_overlap_vec @cython.wraparound(False) @cython.embedsignature(True) @@ -798,7 +798,7 @@ cdef class RouteChoiceSet: @staticmethod cdef vector[double] *compute_prob( vector[double] &total_cost, - vector[double] &gamma_vec, + vector[double] &path_overlap_vec, double beta, double theta ) noexcept nogil: @@ -816,7 +816,7 @@ cdef class RouteChoiceSet: for i in range(total_cost.size()): inv_prob = 0.0 for j in range(total_cost.size()): - inv_prob = inv_prob + pow(gamma_vec[j] / gamma_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i])) + inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i])) prob_vec.push_back(1.0 / inv_prob) @@ -1048,7 +1048,7 @@ cdef class RouteChoiceSet: vector[pair[long long, long long]] &ods, vector[RouteSet_t *] &route_sets, vector[vector[double] *] *cost_set, - vector[vector[double] *] *gamma_set, + vector[vector[double] *] *path_overlap_set, vector[vector[double] *] *prob_set ): cdef: @@ -1060,7 +1060,7 @@ cdef class RouteChoiceSet: # Custom imports, these are declared in route_choice.pxd *not* libarrow. CUInt32Builder *path_builder = new CUInt32Builder(pool) CDoubleBuilder *cost_col = nullptr - CDoubleBuilder *gamma_col = nullptr + CDoubleBuilder *path_overlap_col = nullptr CDoubleBuilder *prob_col = nullptr libpa.CInt32Builder *offset_builder = new libpa.CInt32Builder(pool) # Must be Int32 *not* UInt32 @@ -1073,19 +1073,19 @@ cdef class RouteChoiceSet: int offset = 0 size_t network_link_begin, network_link_end, link - bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr) + bint psl = (cost_set != nullptr and path_overlap_set != nullptr and prob_set != nullptr) - # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route] + # Origins, Destination, Route set, [Cost for route, Path_Overlap for route, Probability for route] columns.resize(6 if psl else 3) if psl: cost_col = new CDoubleBuilder(pool) - gamma_col = new CDoubleBuilder(pool) + path_overlap_col = new CDoubleBuilder(pool) prob_col = new CDoubleBuilder(pool) for i in range(ods.size()): cost_col.AppendValues(deref(deref(cost_set)[i])) - gamma_col.AppendValues(deref(deref(gamma_set)[i])) + path_overlap_col.AppendValues(deref(deref(path_overlap_set)[i])) prob_col.AppendValues(deref(deref(prob_set)[i])) for i in range(ods.size()): @@ -1123,7 +1123,7 @@ cdef class RouteChoiceSet: if psl: cost_col.Finish(&columns[3]) - gamma_col.Finish(&columns[4]) + path_overlap_col.Finish(&columns[4]) prob_col.Finish(&columns[5]) cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema) @@ -1136,7 +1136,7 @@ cdef class RouteChoiceSet: if psl: del cost_col - del gamma_col + del path_overlap_col del prob_col return table @@ -1155,7 +1155,7 @@ cdef class RouteChoiceSet: deref(self.ods), deref(self.results), self.cost_set, - self.gamma_set, + self.path_overlap_set, self.prob_set ) ) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index c31c7b6a2..538b70fc1 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -191,7 +191,7 @@ def test_cost_results(self): err_msg=f", cost differs for OD {od}", ) - def test_gamma_results(self): + def test_path_overlap_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] @@ -200,7 +200,7 @@ def test_gamma_results(self): gb = table.groupby(by=["origin id", "destination id"]) for od, df in gb: - self.assertTrue(all((df["gamma"] > 0) & (df["gamma"] <= 1))) + self.assertTrue(all((df["path overlap"] > 0) & (df["path overlap"] <= 1))) def test_prob_results(self): np.random.seed(0) From 74e525aa8995dbb0ba21d93c0f01ed8de095bf45 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 12 Mar 2024 15:39:50 +1000 Subject: [PATCH 19/52] Prevent deadend removal + graph compression introducing simple loops --- aequilibrae/paths/graph_building.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx index 4bd13c85e..3bb3ef4c9 100644 --- a/aequilibrae/paths/graph_building.pyx +++ b/aequilibrae/paths/graph_building.pyx @@ -325,6 +325,10 @@ def build_compressed_graph(graph): "link_id": np.arange(slink), } ) + + # Link compression can introduce new simple cycles into the graph + comp_lnk = comp_lnk[comp_lnk.a_node != comp_lnk.b_node] + max_link_id = link_id_max * 10 comp_lnk.link_id += max_link_id From 0b1ac041eaf1cab73aee35de67e9f952f9b035cb Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 13 Mar 2024 20:01:34 +1000 Subject: [PATCH 20/52] Move NetworkGraphIndices dataclass, add node mapping, extend API untested --- aequilibrae/paths/graph.py | 41 +++++- .../paths/results/assignment_results.py | 21 +-- aequilibrae/paths/route_choice.py | 126 +++++++++++++++++- aequilibrae/paths/route_choice_set.pyx | 2 +- 4 files changed, 163 insertions(+), 27 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index f28834151..a8ee6a2ce 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -4,6 +4,7 @@ from datetime import datetime from os.path import join from typing import List, Tuple, Optional +import dataclasses import numpy as np import pandas as pd @@ -12,6 +13,26 @@ from aequilibrae.context import get_logger +@dataclasses.dataclass +class NetworkGraphIndices: + network_ab_idx: np.array + network_ba_idx: np.array + graph_ab_idx: np.array + graph_ba_idx: np.array + + +def _get_graph_to_network_mapping(lids, direcs): + num_uncompressed_links = int(np.unique(lids).shape[0]) + indexing = np.zeros(int(lids.max()) + 1, np.uint64) + indexing[np.unique(lids)[:]] = np.arange(num_uncompressed_links) + + graph_ab_idx = direcs > 0 + graph_ba_idx = direcs < 0 + network_ab_idx = indexing[lids[graph_ab_idx]] + network_ba_idx = indexing[lids[graph_ba_idx]] + return NetworkGraphIndices(network_ab_idx, network_ba_idx, graph_ab_idx, graph_ba_idx) + + class GraphBase(ABC): # noqa: B024 """ Graph class. @@ -173,6 +194,7 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None: # The cache property should be recalculated when the graph has been re-prepared self.compressed_link_network_mapping_idx = None self.compressed_link_network_mapping_data = None + self.network_compressed_node_mapping = None def __build_compressed_graph(self): build_compressed_graph(self) @@ -535,8 +557,13 @@ def create_compressed_link_network_mapping(self): if ( self.compressed_link_network_mapping_idx is not None and self.compressed_link_network_mapping_data is not None + and self.network_compressed_node_mapping is not None ): - return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data + return ( + self.compressed_link_network_mapping_idx, + self.compressed_link_network_mapping_data, + self.network_compressed_node_mapping, + ) # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute @@ -548,6 +575,8 @@ def create_compressed_link_network_mapping(self): idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32) data = np.zeros(len(filtered), dtype=np.uint32) + node_mapping = np.full(self.num_nodes, -1) + i = 0 for compressed_id, df in gb: idx[compressed_id] = i @@ -559,7 +588,8 @@ def create_compressed_link_network_mapping(self): # we do this assuming the `a` array is sorted. j = 0 # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup - x = a[np.isin(a, b, invert=True, assume_unique=True)][0] + + a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0] while True: tmp = a.searchsorted(x) if tmp < len(a) and a[tmp] == x: @@ -569,14 +599,19 @@ def create_compressed_link_network_mapping(self): break j += 1 + b_node = x + node_mapping[a_node] = self.compact_graph["a_node"].iat[compressed_id] + node_mapping[b_node] = self.compact_graph["b_node"].iat[compressed_id] + i += len(values) idx[-1] = i self.compressed_link_network_mapping_idx = idx self.compressed_link_network_mapping_data = data + self.network_compressed_node_mapping = node_mapping - return idx, data + return idx, data, node_mapping class Graph(GraphBase): diff --git a/aequilibrae/paths/results/assignment_results.py b/aequilibrae/paths/results/assignment_results.py index 220b7033a..1c6da335a 100644 --- a/aequilibrae/paths/results/assignment_results.py +++ b/aequilibrae/paths/results/assignment_results.py @@ -1,10 +1,9 @@ -import dataclasses import multiprocessing as mp from abc import ABC, abstractmethod import numpy as np from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData -from aequilibrae.paths.graph import Graph, TransitGraph, GraphBase +from aequilibrae.paths.graph import Graph, TransitGraph, GraphBase, _get_graph_to_network_mapping from aequilibrae.parameters import Parameters from aequilibrae import global_logger from pathlib import Path @@ -22,14 +21,6 @@ """ -@dataclasses.dataclass -class NetworkGraphIndices: - network_ab_idx: np.array - network_ba_idx: np.array - graph_ab_idx: np.array - graph_ba_idx: np.array - - class AssignmentResultsBase(ABC): """Assignment results base class for traffic and transit assignments.""" @@ -249,15 +240,7 @@ def total_flows(self) -> None: sum_axis1(self.total_link_loads, self.link_loads, self.cores) def get_graph_to_network_mapping(self): - num_uncompressed_links = int(np.unique(self.lids).shape[0]) - indexing = np.zeros(int(self.lids.max()) + 1, np.uint64) - indexing[np.unique(self.lids)[:]] = np.arange(num_uncompressed_links) - - graph_ab_idx = self.direcs > 0 - graph_ba_idx = self.direcs < 0 - network_ab_idx = indexing[self.lids[graph_ab_idx]] - network_ba_idx = indexing[self.lids[graph_ba_idx]] - return NetworkGraphIndices(network_ab_idx, network_ba_idx, graph_ab_idx, graph_ba_idx) + return _get_graph_to_network_mapping(self.lids, self.direcs) def get_load_results(self) -> AequilibraeData: """ diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 9de098905..467ba3bd5 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -1,11 +1,13 @@ import numpy as np import socket from aequilibrae.context import get_active_project -from aequilibrae.paths.graph import Graph +from aequilibrae.paths.graph import Graph, _get_graph_to_network_mapping from aequilibrae.paths.route_choice_set import RouteChoiceSet -from typing import Optional +from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData +from typing import Optional, Union, Tuple, List import pyarrow as pa import pathlib +import itertools import logging @@ -21,7 +23,7 @@ class RouteChoice: "max_depth": 0, } - def __init__(self, graph: Graph, project=None): + def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): self.paramaters = self.default_paramaters.copy() proj = project or get_active_project(must_exist=False) @@ -31,7 +33,8 @@ def __init__(self, graph: Graph, project=None): self.cores: int = 0 self.graph = graph - self.__rc = RouteChoiceSet(graph) + self.matrix = matrix + self.__rc = None self.schema = RouteChoiceSet.schema self.psl_schema = RouteChoiceSet.psl_schema @@ -41,6 +44,8 @@ def __init__(self, graph: Graph, project=None): self.results: Optional[pa.Table] = None self.where: Optional[pathlib.Path] = None + self.nodes = Optional[Union[List[int], List[Tuple[int, int]]]] = None + def set_algorithm(self, algorithm: str): """ Chooses the assignment algorithm. @@ -113,6 +118,7 @@ def set_save_path_files(self, save_it: bool) -> None: **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved """ self.save_path_files = save_it + raise NotImplementedError() def set_save_routes(self, where: Optional[str] = None) -> None: """ @@ -126,6 +132,42 @@ def set_save_routes(self, where: Optional[str] = None) -> None: """ self.where = pathlib.Path(where) if where is not None else None + def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]): + """ + Prepare OD pairs for batch computation. + + :Arguments: + **nodes** (:obj:`Union[list[int], list[tuple[int, int]]]`): List of node IDs to operate on. If a 1D list is + provided, OD pairs are taken to be all pair permutations of the list. If a list of pairs is provided + OD pairs are taken as is. All node IDs must be present in the compressed graph. To make a node ID + always appear in the compressed graph add it as a centroid. Duplicates will be dropped on execution. + """ + if len(nodes) == 0: + raise ValueError("`nodes` list-like empty.") + + if isinstance(nodes[0], tuple): + # Selection of OD pairs + if any(len(x) != 2 for x in nodes): + raise ValueError("`nodes` list contains non-pair elements") + self.nodes = nodes + + elif isinstance(nodes[0], int): + self.nodes = list(itertools.permutations(nodes, r=2)) + + def execute_single(self, origin: int, destination: int): + if self.__rc is None: + self.__rc = RouteChoiceSet(self.graph) + + return self.__rc.run(origin, destination, **self.paramaters) + + def execute(self, path_size_logit: bool = False): + if self.__rc is None: + self.__rc = RouteChoiceSet(self.graph) + + return self.__rc.batched( + self.nodes, bfsle=self.algorithm == "bfsle", path_size_logit=path_size_logit, **self.paramaters + ) + def info(self) -> dict: """Returns information for the transit assignment procedure @@ -173,3 +215,79 @@ def results(self): ) return self.results + + def get_load_results( + self, which: str = "uncompressed" + ) -> Union[Tuple[AequilibraeData, AequilibraeData], Tuple[AequilibraeData]]: + """ + Translates the link loading results from the graph format into the network format. + + :Returns: + **dataset** (:obj:`tuple[AequilibraeData]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results. + """ + + if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]: + raise ValueError("`which` argumnet must be one of ['uncompressed', 'compressed', 'both']") + + compressed = which == "both" or which == "compressed" + uncompressed = which == "both" or which == "uncompressed" + + fields = self.matrix.names + + tmp = self.__rc.link_loading(self.matrix, self.save_path_files) + if isinstance(tmp, dict): + self.link_loads = {k: v[0] for k, v in tmp.items()} + self.compact_link_loads = {k: v[1] for k, v in tmp.items()} + else: + self.link_loads = {fields[0]: tmp[0]} + self.compact_link_loads = {fields[0]: tmp[1]} + + # Get a mapping from the compressed graph to/from the network graph + m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values) + + # Create a data store with a row for each uncompressed link + + if uncompressed: + uncompressed_res = AequilibraeData.empty( + memory_mode=True, + entries=self.graph.num_links, + field_names=fields, + data_types=[np.float64] * len(fields), + fill=np.nan, + index=self.graph.graph.link_id.values, + ) + + for k, v in self.link_loads: + # Directional Flows + uncompressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) + uncompressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) + + # Tot Flow + uncompressed_res.data[k + "_tot"] = np.nan_to_num(uncompressed_res.data[k + "_ab"]) + np.nan_to_num( + uncompressed_res.data[k + "_ba"] + ) + + if compressed: + compressed_res = AequilibraeData.empty( + memory_mode=True, + entries=self.graph.compact_num_links, + field_names=fields, + data_types=[np.float64] * len(fields), + fill=np.nan, + index=self.graph.compact_graph.id.values, + ) + + for k, v in self.compact_link_loads: + # Directional Flows + compressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) + compressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) + + # Tot Flow + compressed_res.data[k + "_tot"] = np.nan_to_num(compressed_res.data[k + "_ab"]) + np.nan_to_num( + compressed_res.data[k + "_ba"] + ) + + return ((uncompressed_res,) if uncompressed else ()) + ((compressed_res,) if compressed else ()) + + def get_select_link_results(self) -> AequilibraeData: + raise NotImplementedError() diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index d6ed083f0..eeccbaabd 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -143,7 +143,7 @@ cdef class RouteChoiceSet: self.zones = graph.num_zones self.block_flows_through_centroids = graph.block_centroid_flows - self.mapping_idx, self.mapping_data = graph.create_compressed_link_network_mapping() + self.mapping_idx, self.mapping_data, _ = graph.create_compressed_link_network_mapping() def __dealloc__(self): """ From 861ea503bd74f69bb079799f3ecb1a0c4f81c126 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 09:35:49 +1000 Subject: [PATCH 21/52] Add link to bfsle paper, add American spelling --- aequilibrae/paths/route_choice.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 467ba3bd5..422565a4c 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -13,7 +13,7 @@ class RouteChoice: - all_algorithms = ["bfsle", "lp", "link-penalisation"] + all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] default_paramaters = { "beta": 1.0, "theta": 1.0, @@ -49,7 +49,11 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): def set_algorithm(self, algorithm: str): """ Chooses the assignment algorithm. - Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation' + Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. + + BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, + Michael Balmer & Kay W. Axhausen (2013). + https://doi.org/10.1080/18128602.2012.671383 'lp' is also accepted as an alternative to 'link-penalisation' @@ -58,6 +62,7 @@ def set_algorithm(self, algorithm: str): """ algo_dict = {i: i for i in self.all_algorithms} algo_dict["lp"] = "link-penalisation" + algo_dict["link-penalization"] = "link-penalisation" algo = algo_dict.get(algorithm.lower()) if algo is None: @@ -81,7 +86,7 @@ def set_cores(self, cores: int) -> None: def set_paramaters(self, par: dict): """ - Sets the parameters for the route choice TODO, do we want link specific values? + Sets the parameters for the route choice. "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation. "penalty" is a link penalisation specific parameter and will have no effect on BFSLE. @@ -97,11 +102,6 @@ def set_paramaters(self, par: dict): it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field, specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. - - Parameter values can be scalars (same values for the entire network) or network field names - (link-specific values) - Examples: {'alpha': 0.15, 'beta': 4.0} or {'alpha': 'alpha', 'beta': 'beta'} - - :Arguments: **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF """ From 68c2229011306578eaa0427c0470fbed3c8a5a83 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 11:18:24 +1000 Subject: [PATCH 22/52] Fix lots of small errors in wrapper class --- aequilibrae/paths/route_choice.py | 132 ++++++++++++++++++------------ 1 file changed, 78 insertions(+), 54 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 422565a4c..d37ceb7fc 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -1,15 +1,17 @@ -import numpy as np +import itertools +import logging +import pathlib import socket +from typing import List, Optional, Tuple, Union +from uuid import uuid4 + +import numpy as np +import pandas as pd +import pyarrow as pa from aequilibrae.context import get_active_project +from aequilibrae.matrix import AequilibraeMatrix from aequilibrae.paths.graph import Graph, _get_graph_to_network_mapping from aequilibrae.paths.route_choice_set import RouteChoiceSet -from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData -from typing import Optional, Union, Tuple, List -import pyarrow as pa -import pathlib -import itertools - -import logging class RouteChoice: @@ -25,6 +27,7 @@ class RouteChoice: def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): self.paramaters = self.default_paramaters.copy() + self.procedure_id = uuid4().hex proj = project or get_active_project(must_exist=False) self.project = proj @@ -43,8 +46,11 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): self.link_loads: Optional[np.array] = None self.results: Optional[pa.Table] = None self.where: Optional[pathlib.Path] = None + self.save_path_files: bool = False + + self.nodes: Optional[Union[List[int], List[Tuple[int, int]]]] = None - self.nodes = Optional[Union[List[int], List[Tuple[int, int]]]] = None + self._config = {} def set_algorithm(self, algorithm: str): """ @@ -79,12 +85,9 @@ def set_cores(self, cores: int) -> None: :Arguments: **cores** (:obj:`int`): Number of CPU cores to use """ - if not self.classes: - raise RuntimeError("You need load transit classes before overwriting the number of cores") - self.cores = cores - def set_paramaters(self, par: dict): + def set_paramaters(self, **kwargs): """ Sets the parameters for the route choice. @@ -103,13 +106,13 @@ def set_paramaters(self, par: dict): specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. :Arguments: - **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF + **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm """ - if any(key not in self.default_paramaters for key in par.keys()): + if any(key not in self.default_paramaters for key in kwargs.keys()): raise ValueError("Invalid parameter provided") - self.paramaters = self.default_paramaters | par + self.paramaters = self.default_paramaters | kwargs def set_save_path_files(self, save_it: bool) -> None: """Turn path saving on or off. @@ -154,18 +157,31 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]): elif isinstance(nodes[0], int): self.nodes = list(itertools.permutations(nodes, r=2)) - def execute_single(self, origin: int, destination: int): + def execute_single(self, origin: int, destination: int, path_size_logit: bool = False): if self.__rc is None: self.__rc = RouteChoiceSet(self.graph) - return self.__rc.run(origin, destination, **self.paramaters) + self.results = None + return self.__rc.run( + origin, + destination, + bfsle=self.algorithm == "bfsle", + path_size_logit=path_size_logit, + cores=self.cores, + **self.paramaters, + ) def execute(self, path_size_logit: bool = False): if self.__rc is None: self.__rc = RouteChoiceSet(self.graph) + self.results = None return self.__rc.batched( - self.nodes, bfsle=self.algorithm == "bfsle", path_size_logit=path_size_logit, **self.paramaters + self.nodes, + bfsle=self.algorithm == "bfsle", + path_size_logit=path_size_logit, + cores=self.cores, + **self.paramaters, ) def info(self) -> dict: @@ -195,7 +211,7 @@ def log_specification(self): self.logger.info("Route Choice specification") self.logger.info(self._config) - def results(self): + def get_results(self): """Returns the results of the route choice procedure Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. @@ -217,17 +233,19 @@ def results(self): return self.results def get_load_results( - self, which: str = "uncompressed" - ) -> Union[Tuple[AequilibraeData, AequilibraeData], Tuple[AequilibraeData]]: + self, + which: str = "uncompressed", + clamp: bool = True, + ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[pd.DataFrame]]: """ Translates the link loading results from the graph format into the network format. :Returns: - **dataset** (:obj:`tuple[AequilibraeData]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results. + **dataset** (:obj:`tuple[pd.DataFrame]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results. """ if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]: - raise ValueError("`which` argumnet must be one of ['uncompressed', 'compressed', 'both']") + raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']") compressed = which == "both" or which == "compressed" uncompressed = which == "both" or which == "uncompressed" @@ -242,52 +260,58 @@ def get_load_results( self.link_loads = {fields[0]: tmp[0]} self.compact_link_loads = {fields[0]: tmp[1]} + if clamp: + for v in itertools.chain(self.link_loads.values(), self.compact_link_loads.values()): + v[(v < 1e-15)] = 0.0 + # Get a mapping from the compressed graph to/from the network graph m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values) + m_compact = _get_graph_to_network_mapping( + self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values + ) + lids = np.unique(self.graph.graph.link_id.values) + compact_lids = np.unique(self.graph.compact_graph.link_id.values) # Create a data store with a row for each uncompressed link - if uncompressed: - uncompressed_res = AequilibraeData.empty( - memory_mode=True, - entries=self.graph.num_links, - field_names=fields, - data_types=[np.float64] * len(fields), - fill=np.nan, - index=self.graph.graph.link_id.values, + uncompressed_df = pd.DataFrame( + {"link_id": lids} + | {k + dir: np.zeros(lids.shape) for k in self.link_loads.keys() for dir in ["_ab", "_ba"]} ) - - for k, v in self.link_loads: + for k, v in self.link_loads.items(): # Directional Flows - uncompressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) - uncompressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) + uncompressed_df[k + "_ab"].values[m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) + uncompressed_df[k + "_ba"].values[m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) # Tot Flow - uncompressed_res.data[k + "_tot"] = np.nan_to_num(uncompressed_res.data[k + "_ab"]) + np.nan_to_num( - uncompressed_res.data[k + "_ba"] + uncompressed_df[k + "_tot"] = np.nan_to_num(uncompressed_df[k + "_ab"].values) + np.nan_to_num( + uncompressed_df[k + "_ba"].values ) if compressed: - compressed_res = AequilibraeData.empty( - memory_mode=True, - entries=self.graph.compact_num_links, - field_names=fields, - data_types=[np.float64] * len(fields), - fill=np.nan, - index=self.graph.compact_graph.id.values, + compressed_df = pd.DataFrame( + {"link_id": compact_lids} + | { + k + dir: np.zeros(compact_lids.shape) + for k in self.compact_link_loads.keys() + for dir in ["_ab", "_ba"] + } ) - - for k, v in self.compact_link_loads: - # Directional Flows - compressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) - compressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) + for k, v in self.compact_link_loads.items(): + compressed_df[k + "_ab"].values[m_compact.network_ab_idx] = np.nan_to_num(v[m_compact.graph_ab_idx]) + compressed_df[k + "_ba"].values[m_compact.network_ba_idx] = np.nan_to_num(v[m_compact.graph_ba_idx]) # Tot Flow - compressed_res.data[k + "_tot"] = np.nan_to_num(compressed_res.data[k + "_ab"]) + np.nan_to_num( - compressed_res.data[k + "_ba"] + compressed_df[k + "_tot"] = np.nan_to_num(compressed_df[k + "_ab"].values) + np.nan_to_num( + compressed_df[k + "_ba"].values ) - return ((uncompressed_res,) if uncompressed else ()) + ((compressed_res,) if compressed else ()) + if uncompressed and not compressed: + return uncompressed_df + elif not uncompressed and compressed: + return compressed_df + else: + return uncompressed_df, compressed_df - def get_select_link_results(self) -> AequilibraeData: + def get_select_link_results(self) -> pd.DataFrame: raise NotImplementedError() From f0cd2cf7137dfc6515bed0432074f95f2332ec8e Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 11:56:26 +1000 Subject: [PATCH 23/52] Merges set algorithm and set parameters. Better docs --- aequilibrae/paths/route_choice.py | 125 +++++++++++++++++++----------- 1 file changed, 79 insertions(+), 46 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index d37ceb7fc..715b77be7 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -16,13 +16,11 @@ class RouteChoice: all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] + default_paramaters = { - "beta": 1.0, - "theta": 1.0, - "penalty": 1.1, - "seed": 0, - "max_routes": 0, - "max_depth": 0, + "generic": {"seed": 0, "max_routes": 0, "max_depth": 0}, + "link-penalisation": {"penalty": 1.1}, + "bfsle": {"beta": 1.0, "theta": 1.0}, } def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): @@ -52,10 +50,10 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): self._config = {} - def set_algorithm(self, algorithm: str): + def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: """ - Chooses the assignment algorithm. - Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. + Chooses the assignment algorithm and set parameters. + Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013). @@ -63,8 +61,25 @@ def set_algorithm(self, algorithm: str): 'lp' is also accepted as an alternative to 'link-penalisation' + Setting the parameters for the route choice: + + `beta`, `theta`, and `seed` are BFSLE specific parameters. + `penalty` is a link penalisation specific parameter. + + Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms. + + - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is + largely dependent on the size of the paths within the network. For very small networks a value of 10 + is a recommended starting point. For large networks a good starting value is 5. Increase the value + until the number of desired routes is being consistently returned. + + - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced, + it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field, + specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. + :Arguments: **algorithm** (:obj:`str`): Algorithm to be used + **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm """ algo_dict = {i: i for i in self.all_algorithms} algo_dict["lp"] = "link-penalisation" @@ -74,9 +89,16 @@ def set_algorithm(self, algorithm: str): if algo is None: raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}") + defaults = self.default_paramaters["generic"] | self.default_paramaters[algo].keys() + for key in kwargs.keys(): + if key not in defaults: + raise ValueError(f"Invalid parameter `{key}` provided for algorithm `{algo}`") + self.algorithm = algo self._config["Algorithm"] = algo + self.paramaters = defaults | kwargs + def set_cores(self, cores: int) -> None: """Allows one to set the number of cores to be used @@ -87,33 +109,6 @@ def set_cores(self, cores: int) -> None: """ self.cores = cores - def set_paramaters(self, **kwargs): - """ - Sets the parameters for the route choice. - - "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation. - "penalty" is a link penalisation specific parameter and will have no effect on BFSLE. - - Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms. - - - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is - largely dependent on the size of the paths within the network. For very small networks a value of 10 - is a recommended starting point. For large networks a good starting value is 5. Increase the value - until the number of desired routes is being consistently returned. - - - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced, - it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field, - specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. - - :Arguments: - **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm - """ - - if any(key not in self.default_paramaters for key in kwargs.keys()): - raise ValueError("Invalid parameter provided") - - self.paramaters = self.default_paramaters | kwargs - def set_save_path_files(self, save_it: bool) -> None: """Turn path saving on or off. @@ -135,7 +130,7 @@ def set_save_routes(self, where: Optional[str] = None) -> None: """ self.where = pathlib.Path(where) if where is not None else None - def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]): + def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None: """ Prepare OD pairs for batch computation. @@ -157,7 +152,23 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]): elif isinstance(nodes[0], int): self.nodes = list(itertools.permutations(nodes, r=2)) - def execute_single(self, origin: int, destination: int, path_size_logit: bool = False): + def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]: + """ + Generate route choice sets between `origin` and `destination`, potentially performing an assignment. + + Does not require preparation. + + Node IDs must be present in the compressed graph. To make a node ID always appear in the compressed + graph add it as a centroid. + + :Arguments: + **origin** (:obj:`int`): Origin node ID. + **destination** (:obj:`int`): Destination node ID. + **perform_assignment** (:obj:`bool`): Whether or not to perform an assignment. Default `False`. + + :Returns: + ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs. + """ if self.__rc is None: self.__rc = RouteChoiceSet(self.graph) @@ -166,20 +177,36 @@ def execute_single(self, origin: int, destination: int, path_size_logit: bool = origin, destination, bfsle=self.algorithm == "bfsle", - path_size_logit=path_size_logit, + path_size_logit=perform_assignment, cores=self.cores, **self.paramaters, ) - def execute(self, path_size_logit: bool = False): + def execute(self, perform_assignment: bool = False) -> None: + """ + Generate route choice sets between the previously supplied nodes, potentially performing an assignment. + + Node IDs must be present in the compressed graph. To make a node ID always appear in the compressed + graph add it as a centroid. + + To access results see `RouteChoice.get_results()`. + + :Arguments: + **perform_assignment** (:obj:`bool`): Whether or not to perform an assignment. Default `False`. + """ + if self.nodes is None: + raise ValueError( + "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`" + ) + if self.__rc is None: self.__rc = RouteChoiceSet(self.graph) self.results = None - return self.__rc.batched( + self.__rc.batched( self.nodes, bfsle=self.algorithm == "bfsle", - path_size_logit=path_size_logit, + path_size_logit=perform_assignment, cores=self.cores, **self.paramaters, ) @@ -211,14 +238,13 @@ def log_specification(self): self.logger.info("Route Choice specification") self.logger.info(self._config) - def get_results(self): + def get_results(self) -> pa.Table: """Returns the results of the route choice procedure Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. :Returns: **results** (:obj:`pa.Table`): Table with the results of the route choice procedure - """ if self.results is None: try: @@ -236,12 +262,19 @@ def get_load_results( self, which: str = "uncompressed", clamp: bool = True, - ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[pd.DataFrame]]: + ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]: """ Translates the link loading results from the graph format into the network format. + :Arguments: + **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`. + **clamp** (:obj:`bool`): Whether or not to treat values `< 1e-15` as `0.0`. + :Returns: - **dataset** (:obj:`tuple[pd.DataFrame]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results. + **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): + A tuple of uncompressed and compressed DataFrames with the link loading results. Or + the requested link loading result.s + """ if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]: From 2c188164eecae9f2f553198cb708553859e9228f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 15:44:19 +1000 Subject: [PATCH 24/52] Add example docs and various bug fixes --- aequilibrae/paths/route_choice.py | 35 +++-- aequilibrae/paths/route_choice_set.pyx | 12 +- .../trip_distribution/plot_route_choice.py | 124 ++++++++++++++++++ 3 files changed, 160 insertions(+), 11 deletions(-) create mode 100644 docs/source/examples/trip_distribution/plot_route_choice.py diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 715b77be7..22eeee3f1 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -23,7 +23,7 @@ class RouteChoice: "bfsle": {"beta": 1.0, "theta": 1.0}, } - def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None): + def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None): self.paramaters = self.default_paramaters.copy() self.procedure_id = uuid4().hex @@ -55,7 +55,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: Chooses the assignment algorithm and set parameters. Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. - BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, + BFSLE implementation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013). https://doi.org/10.1080/18128602.2012.671383 @@ -89,7 +89,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: if algo is None: raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}") - defaults = self.default_paramaters["generic"] | self.default_paramaters[algo].keys() + defaults = self.default_paramaters["generic"] | self.default_paramaters[algo] for key in kwargs.keys(): if key not in defaults: raise ValueError(f"Invalid parameter `{key}` provided for algorithm `{algo}`") @@ -120,7 +120,7 @@ def set_save_path_files(self, save_it: bool) -> None: def set_save_routes(self, where: Optional[str] = None) -> None: """ - Set save path for route choice resutls. Provide ``None`` to disable. + Set save path for route choice results. Provide ``None`` to disable. **warning** enabling route saving will disable in memory results. Viewing the results will read the results from disk first. @@ -128,7 +128,11 @@ def set_save_routes(self, where: Optional[str] = None) -> None: :Arguments: **save_it** (:obj:`bool`): Boolean to indicate whether routes should be saved """ - self.where = pathlib.Path(where) if where is not None else None + if where is not None: + where = pathlib.Path(where) + if not where.exists(): + raise ValueError(f"Path does not exist `{where}`") + self.where = where def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None: """ @@ -149,7 +153,7 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None: raise ValueError("`nodes` list contains non-pair elements") self.nodes = nodes - elif isinstance(nodes[0], int): + elif isinstance(nodes[0], (int, np.unsignedinteger)): self.nodes = list(itertools.permutations(nodes, r=2)) def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]: @@ -179,6 +183,7 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool bfsle=self.algorithm == "bfsle", path_size_logit=perform_assignment, cores=self.cores, + where=str(self.where) if self.where is not None else None, **self.paramaters, ) @@ -208,6 +213,7 @@ def execute(self, perform_assignment: bool = False) -> None: bfsle=self.algorithm == "bfsle", path_size_logit=perform_assignment, cores=self.cores, + where=str(self.where) if self.where is not None else None, **self.paramaters, ) @@ -223,7 +229,11 @@ def info(self) -> dict: **info** (:obj:`dict`): Dictionary with summary information """ - matrix_totals = {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)} + matrix_totals = ( + {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)} + if self.matrix is not None + else None + ) info = { "Algorithm": self.algorithm, @@ -238,11 +248,13 @@ def log_specification(self): self.logger.info("Route Choice specification") self.logger.info(self._config) - def get_results(self) -> pa.Table: + def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]: """Returns the results of the route choice procedure Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. + If `save_routes` was specified then a Pyarrow dataset is returned. The call is responsible for reading this dataset. + :Returns: **results** (:obj:`pa.Table`): Table with the results of the route choice procedure """ @@ -251,7 +263,7 @@ def get_results(self) -> pa.Table: self.results = self.__rc.get_results() except RuntimeError as err: if self.where is None: - raise ValueError("Route choice results not computed and read/save path not specificed") from err + raise ValueError("Route choice results not computed and read/save path not specified") from err self.results = pa.dataset.dataset( self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema) ) @@ -280,6 +292,11 @@ def get_load_results( if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]: raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']") + if self.matrix is None: + raise ValueError( + "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute." + ) + compressed = which == "both" or which == "compressed" uncompressed = which == "both" or which == "uncompressed" diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index eeccbaabd..b8caf774a 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -217,7 +217,15 @@ cdef class RouteChoiceSet: Represents paths from ``origin`` to ``destination``. """ self.batched([(origin, destination)], *args, **kwargs) - return [tuple(x) for x in self.get_results().column("route set").to_pylist()] + where = kwargs.get("where", None) + if where is not None: + schema = self.psl_schema if kwargs.get("path_size_logit", False) else self.schema + results = pa.dataset.dataset( + where, format="parquet", partitioning=pa.dataset.HivePartitioning(schema) + ).to_table() + else: + results = self.get_results() + return [tuple(x) for x in results.column("route set").to_pylist()] # Bounds checking doesn't really need to be disabled here but the warning is annoying @cython.boundscheck(False) @@ -315,7 +323,7 @@ cdef class RouteChoiceSet: warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs") if where is not None: - checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"]) + checkpoint = Checkpoint(where, self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"]) batches = list(Checkpoint.batches(list(set_ods))) max_results_len = max(len(batch) for batch in batches) else: diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py new file mode 100644 index 000000000..5f285cb22 --- /dev/null +++ b/docs/source/examples/trip_distribution/plot_route_choice.py @@ -0,0 +1,124 @@ +""" +.. _example_usage_route_choice: + +Route Choice +================= + +In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La Serena Metropolitan Area in Chile. +""" + +# Imports +from uuid import uuid4 +from tempfile import gettempdir +from os.path import join +from aequilibrae.utils.create_example import create_example + +# We create the example project inside our temp folder +fldr = join(gettempdir(), uuid4().hex) + +project = create_example(fldr, "coquimbo") + +# %% +import logging +import sys + +# We the project opens, we can tell the logger to direct all messages to the terminal as well +logger = project.logger +stdout_handler = logging.StreamHandler(sys.stdout) +formatter = logging.Formatter("%(asctime)s;%(levelname)s ; %(message)s") +stdout_handler.setFormatter(formatter) +logger.addHandler(stdout_handler) + +# %% +# Route Choice +# --------------- + +# %% +import numpy as np + +# %% +# Let's build all graphs +project.network.build_graphs() +# We get warnings that several fields in the project are filled with NaNs. +# This is true, but we won't use those fields. + +# %% +# We grab the graph for cars +graph = project.network.graphs["c"] + +# we also see what graphs are available +project.network.graphs.keys() + +# let's say we want to minimize the distance +graph.set_graph("distance") + +# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of western centroids. +graph.prepare_graph(np.array(list(range(28, 41)) + list(range(49, 91)))) + +# %% +# Mock demand matrix +# ~~~~~~~~~~~~~~~~~~ +# We'll create a mock demand matrix with demand `1` for every zone. +from aequilibrae.matrix import AequilibraeMatrix + +names_list = ["demand", "5x demand"] + +mat = AequilibraeMatrix() +mat.create_empty(zones=graph.num_zones, matrix_names=names_list, memory_only=True) +mat.index = graph.centroids[:] +mat.matrices[:, :, 0] = np.full((graph.num_zones, graph.num_zones), 1.0) +mat.matrices[:, :, 1] = np.full((graph.num_zones, graph.num_zones), 5.0) +mat.computational_view() + +# %% +# Route Choice class +# ~~~~~~~~~~~~~~~~~~ +# Here we'll construct and use the Route Choice class to generate our route sets +from aequilibrae.paths import RouteChoice + +# %% +# This object construct might take a minute depending on the size of the graph due to the construction of the compressed link to network link mapping that's required. +# This is a one time operation per graph and is cached. +# We need to supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed. +rc = RouteChoice(graph, mat) + +# %% +# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE based on the paper +# "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013). +# https://doi.org/10.1080/18128602.2012.671383 +# It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results. + +# rc.set_choice_set_generation("link-penalisation", max_routes=5, penalty=1.1) +rc.set_choice_set_generation("bfsle", max_routes=5, beta=1.1, theta=1.1) + +# %% +# All parameters are optional, the defaults are: +print(rc.default_paramaters) + +# %% +# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid as well an an assignment. +results = rc.execute_single(28, 90, perform_assignment=True) +print(results[0]) + +# %% +# Because we asked it to also perform an assignment we can access the various results from that +# The default return is a Pyarrow Table but Pandas is nicer for viewing. +rc.get_results().to_pandas() + +# %% +# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD pairs we'd like to use, or we can provided a 1D list +# and the generation will be run on all permutations. +rc.prepare(graph.centroids[:5]) # You can inspect the result with rc.nodes + +# %% +# Now we can perform a batch computation with an assignment +rc.execute(perform_assignment=True) +rc.get_results().to_pandas() + +# %% +# Since we provided a matrix initially we can also perform link loading based on our assignment results. +# We can specify which link loading we want, either just uncompressed, just compressed, or both. +rc.get_load_results(which="both") + +# %% +project.close() From 09c7294b816f9f1b10484af92fe051ecbbe57e69 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 15:46:21 +1000 Subject: [PATCH 25/52] Make deadlock case and error, needs a real fix --- aequilibrae/paths/route_choice_set.pyx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index b8caf774a..56217bbd6 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -289,6 +289,10 @@ cdef class RouteChoiceSet: if self.nodes_to_indices_view[d] == -1: raise ValueError(f"Destination {d} is not present within the compact graph") + + if where is not None and cores != 1: + raise NotImplementedError("current implementation suffers from a deadlock when using multithreading and writing to disk") + cdef: long long origin_index, dest_index, i unsigned int c_max_routes = max_routes From 757535fe123341f9d45edd005ad32fe24509aa86 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 19 Mar 2024 15:54:52 +1000 Subject: [PATCH 26/52] Enforce single thread for tests --- tests/aequilibrae/paths/test_route_choice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 538b70fc1..781277c29 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -160,7 +160,7 @@ def test_round_trip(self): path = join(self.project.project_base_path, "batched results") rc.batched(nodes, max_routes=max_routes, max_depth=10) table = rc.get_results().to_pandas() - rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path) + rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path, cores=1) dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema)) new_table = ( From 4f6fc869b0af3ae7b8dcbe4a0444e6cd7af1f6aa Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 20 Mar 2024 11:07:11 +1000 Subject: [PATCH 27/52] Fix the "deadlock", code wasn't deadlocking but it was running away --- aequilibrae/paths/route_choice.py | 10 +++++++--- aequilibrae/paths/route_choice_set.pxd | 2 ++ aequilibrae/paths/route_choice_set.pyx | 26 +++++++++++++++++++------- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 22eeee3f1..75858c58d 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -18,7 +18,7 @@ class RouteChoice: all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] default_paramaters = { - "generic": {"seed": 0, "max_routes": 0, "max_depth": 0}, + "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100}, "link-penalisation": {"penalty": 1.1}, "bfsle": {"beta": 1.0, "theta": 1.0}, } @@ -66,16 +66,20 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: `beta`, `theta`, and `seed` are BFSLE specific parameters. `penalty` is a link penalisation specific parameter. - Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms. + Setting `max_depth` or `max_misses`, while not required, is strongly recommended to prevent runaway algorithms. + `max_misses` is the maximum amount of duplicate routes found per OD pair. If it is exceeded then the route set + if returned with fewer than `max_routes`. It has a default value of `100`. - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is largely dependent on the size of the paths within the network. For very small networks a value of 10 is a recommended starting point. For large networks a good starting value is 5. Increase the value - until the number of desired routes is being consistently returned. + until the number of desired routes is being consistently returned. If it is exceeded then the route set + if returned with fewer than `max_routes`. - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced, it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field, specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. + If it is exceeded then the route set if returned with fewer than `max_routes`. :Arguments: **algorithm** (:obj:`str`): Algorithm to be used diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index e80a41c6a..534a854e1 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -170,6 +170,7 @@ cdef class RouteChoiceSet: long dest_index, unsigned int max_routes, unsigned int max_depth, + unsigned int max_misses, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, @@ -184,6 +185,7 @@ cdef class RouteChoiceSet: long dest_index, unsigned int max_routes, unsigned int max_depth, + unsigned int max_misses, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 56217bbd6..eb00fa745 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -237,6 +237,7 @@ cdef class RouteChoiceSet: ods: List[Tuple[int, int]], max_routes: int = 0, max_depth: int = 0, + max_misses: int = 100, seed: int = 0, cores: int = 0, a_star: bool = True, @@ -259,6 +260,7 @@ cdef class RouteChoiceSet: **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation. Must be non-negative. Default of ``0`` for unlimited. + **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair. Terminates if exceeded. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available. **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. @@ -289,14 +291,11 @@ cdef class RouteChoiceSet: if self.nodes_to_indices_view[d] == -1: raise ValueError(f"Destination {d} is not present within the compact graph") - - if where is not None and cores != 1: - raise NotImplementedError("current implementation suffers from a deadlock when using multithreading and writing to disk") - cdef: long long origin_index, dest_index, i unsigned int c_max_routes = max_routes unsigned int c_max_depth = max_depth + unsigned int c_max_misses = max_misses unsigned int c_seed = seed unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads() @@ -399,6 +398,7 @@ cdef class RouteChoiceSet: dest_index, c_max_routes, c_max_depth, + c_max_misses, cost_matrix[threadid()], predecessors_matrix[threadid()], conn_matrix[threadid()], @@ -413,6 +413,7 @@ cdef class RouteChoiceSet: dest_index, c_max_routes, c_max_depth, + c_max_misses, cost_matrix[threadid()], predecessors_matrix[threadid()], conn_matrix[threadid()], @@ -539,6 +540,7 @@ cdef class RouteChoiceSet: long dest_index, unsigned int max_routes, unsigned int max_depth, + unsigned int max_misses, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, @@ -558,6 +560,8 @@ cdef class RouteChoiceSet: unordered_set[long long] *banned unordered_set[long long] *new_banned vector[long long] *vec + pair[RouteSet_t.iterator, bool] status + unsigned int miss_count = 0 long long p, connector max_routes = max_routes if max_routes != 0 else UINT_MAX @@ -615,8 +619,9 @@ cdef class RouteChoiceSet: next_queue.push_back(new_banned) # The deduplication of routes occurs here - route_set.insert(vec) - if route_set.size() >= max_routes: + status = route_set.insert(vec) + miss_count += not status.second + if miss_count > max_misses or route_set.size() >= max_routes: break queue.swap(next_queue) @@ -642,6 +647,7 @@ cdef class RouteChoiceSet: long dest_index, unsigned int max_routes, unsigned int max_depth, + unsigned int max_misses, double [:] thread_cost, long long [:] thread_predecessors, long long [:] thread_conn, @@ -656,6 +662,8 @@ cdef class RouteChoiceSet: # Scratch objects vector[long long] *vec long long p, connector + pair[RouteSet_t.iterator, bool] status + unsigned int miss_count = 0 max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX @@ -682,7 +690,11 @@ cdef class RouteChoiceSet: for connector in deref(vec): thread_cost[connector] *= penatly - route_set.insert(vec) + # To prevent runaway algorithms if we find a n duplicate routes we should stop + status = route_set.insert(vec) + miss_count += not status.second + if miss_count > max_misses: + break else: break From 0aec7ecf15809ce68ef2f82757cf99a4cf35a8eb Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 20 Mar 2024 11:45:19 +1000 Subject: [PATCH 28/52] Limit pyarrow IO threads, Cython += is funky --- aequilibrae/paths/route_choice_set.pyx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index eb00fa745..72293bc9c 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -316,6 +316,8 @@ cdef class RouteChoiceSet: # self.a_star = a_star + pa.set_io_thread_count(cores) + if self.a_star: _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64) # Dummy array to allow slicing else: @@ -620,7 +622,7 @@ cdef class RouteChoiceSet: # The deduplication of routes occurs here status = route_set.insert(vec) - miss_count += not status.second + miss_count = miss_count + (not status.second) if miss_count > max_misses or route_set.size() >= max_routes: break @@ -692,7 +694,7 @@ cdef class RouteChoiceSet: # To prevent runaway algorithms if we find a n duplicate routes we should stop status = route_set.insert(vec) - miss_count += not status.second + miss_count = miss_count + (not status.second) if miss_count > max_misses: break else: From 7f8879b6bcc3398591a48039692e431def96b148 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 20 Mar 2024 11:49:39 +1000 Subject: [PATCH 29/52] Pyarrow IO threads must be > 0, give tests from more freedom --- aequilibrae/paths/route_choice_set.pyx | 2 +- tests/aequilibrae/paths/test_route_choice.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 72293bc9c..f7d82be40 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -316,7 +316,7 @@ cdef class RouteChoiceSet: # self.a_star = a_star - pa.set_io_thread_count(cores) + pa.set_io_thread_count(c_cores) if self.a_star: _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64) # Dummy array to allow slicing diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 781277c29..57ee9a22b 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -106,7 +106,7 @@ def test_route_choice_batched(self): nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] max_routes = 20 - rc.batched(nodes, max_routes=max_routes, max_depth=10) + rc.batched(nodes, max_routes=max_routes, max_depth=10, max_misses=200) results = rc.get_results() gb = results.to_pandas().groupby(by="origin id") From ec2127280a736021032db7758e35b5953651c785 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 20 Mar 2024 17:31:07 +1000 Subject: [PATCH 30/52] Better type checks and some tests --- aequilibrae/paths/route_choice.py | 15 +++-- tests/aequilibrae/paths/test_route_choice.py | 67 ++++++++++++++++++++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 75858c58d..40c726947 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -151,14 +151,19 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None: if len(nodes) == 0: raise ValueError("`nodes` list-like empty.") - if isinstance(nodes[0], tuple): - # Selection of OD pairs - if any(len(x) != 2 for x in nodes): - raise ValueError("`nodes` list contains non-pair elements") + if all( + isinstance(pair, tuple) + and len(pair) == 2 + and isinstance(pair[0], (int, np.unsignedinteger)) + and isinstance(pair[1], (int, np.unsignedinteger)) + for pair in nodes + ): self.nodes = nodes - elif isinstance(nodes[0], (int, np.unsignedinteger)): + elif len(nodes) > 1 and all(isinstance(x, (int, np.unsignedinteger)) for x in nodes): self.nodes = list(itertools.permutations(nodes, r=2)) + else: + raise ValueError(f"{type(nodes)} or {type(nodes[0])} for not valid types for the `prepare` method") def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]: """ diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 57ee9a22b..8d5542f24 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -225,6 +225,73 @@ def test_link_loading(self): np.testing.assert_array_almost_equal(link_loads, link_loads2) +class TestRouteChoice(TestCase): + def setUp(self) -> None: + os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"] + + proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex) + os.mkdir(proj_path) + zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path) + + self.project = Project() + self.project.open(proj_path) + self.project.network.build_graphs(fields=["distance"], modes=["c"]) + self.graph = self.project.network.graphs["c"] # type: Graph + self.graph.set_graph("distance") + self.graph.set_blocked_centroid_flows(False) + + self.mat = self.project.matrices.get_matrix("demand_omx") + self.mat.computational_view() + + def test_prepare(self): + rc = RouteChoice(self.graph, self.mat) + + with self.assertRaises(ValueError): + rc.prepare([]) + + with self.assertRaises(ValueError): + rc.prepare(["1", "2"]) + + with self.assertRaises(ValueError): + rc.prepare([("1", "2")]) + + with self.assertRaises(ValueError): + rc.prepare([1]) + + rc.prepare([1, 2]) + self.assertListEqual(rc.nodes, [(1, 2), (2, 1)]) + rc.prepare([(1, 2)]) + self.assertListEqual(rc.nodes, [(1, 2)]) + + def test_set_save_routes(self): + rc = RouteChoice(self.graph, self.mat) + + with self.assertRaises(ValueError): + rc.set_save_routes("/non-existent-path") + + def test_set_choice_set_generation(self): + rc = RouteChoice(self.graph, self.mat) + + rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) + self.assertDictEqual( + rc.paramaters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} + ) + + rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1) + self.assertDictEqual( + rc.paramaters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0} + ) + + with self.assertRaises(ValueError): + rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0) + + with self.assertRaises(ValueError): + rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1) + + with self.assertRaises(AttributeError): + rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1) + + def generate_line_strings(project, graph, results): """Debug method""" import geopandas as gpd From 6ad539b02d26cae99a5675393f38f908fa1ac4ac Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:11:39 +1000 Subject: [PATCH 31/52] Fix segfault and infinite loop due to miss count --- aequilibrae/paths/route_choice_set.pxd | 2 +- aequilibrae/paths/route_choice_set.pyx | 331 ++++++++++++++----------- setup.cfg | 6 +- 3 files changed, 197 insertions(+), 142 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index 534a854e1..0044a28c6 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -196,7 +196,7 @@ cdef class RouteChoiceSet: ) noexcept nogil @staticmethod - cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil @staticmethod cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index f7d82be40..e846b0311 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -1,14 +1,43 @@ # cython: language_level=3str +from aequilibrae.paths.graph import Graph + +from cython.operator cimport dereference as deref +from cython.operator cimport preincrement as inc +from cython.parallel cimport parallel, prange, threadid +from libc.limits cimport UINT_MAX +from libc.math cimport INFINITY, exp, pow +from libc.stdlib cimport abort +from libc.string cimport memcpy +from libcpp cimport nullptr +from libcpp.algorithm cimport lower_bound, reverse, sort +from libcpp.unordered_map cimport unordered_map +from libcpp.unordered_set cimport unordered_set +from libcpp.utility cimport pair +from libcpp.vector cimport vector +from openmp cimport omp_get_num_threads + +import itertools +import logging +import pathlib +import warnings +from typing import List, Tuple + +import numpy as np +import pyarrow as pa +from aequilibrae.matrix import AequilibraeMatrix + +cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. +cimport pyarrow as pa +cimport pyarrow.lib as libpa + """This module aims to implemented the BFS-LE algorithm as described in Rieser-Schüssler, Balmer, and Axhausen, 'Route Choice Sets for Very High-Resolution Data'. https://doi.org/10.1080/18128602.2012.671383 -A rough overview of the algorithm is as follows. - 1. Prepare the initial graph, this is depth 0 with no links removed. - 2. Find a short path, P. If P is not empty add P to the path set. - 3. For all links p in P, remove p from E, compounding with the previously removed links. - 4. De-duplicate the sub-graphs, we only care about unique sub-graphs. - 5. Go to 2. +A rough overview of the algorithm is as follows. 1. Prepare the initial graph, this is depth 0 with no links removed. + 2. Find a short path, P. If P is not empty add P to the path set. 3. For all links p in P, remove p from E, + compounding with the previously removed links. 4. De-duplicate the sub-graphs, we only care about unique + sub-graphs. 5. Go to 2. Details: The general idea of the algorithm is pretty simple, as is the implementation. The caveats here is that there is a lot of cpp interop and memory management. A description of the purpose of variables is in order: @@ -26,20 +55,20 @@ independent and should only use commutative operations. The comparator is the sa removed link sets we've seen before. This allows us to detected duplicated graphs. rng: A custom imported version of std::linear_congruential_engine. libcpp doesn't provide one so we do. It should be -significantly faster than the std::mersenne_twister_engine without sacrificing much. We don't need amazing RNG, just -ok and fast. This is only used to shuffle the queue. +significantly faster than the std::mersenne_twister_engine without sacrificing much. We don't need amazing RNG, just ok +and fast. This is only used to shuffle the queue. -queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these so a -vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to, building -up with all the newly removed link sets. These two are swapped at the end of an iteration, next_queue is then +queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these +so a vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to, +building up with all the newly removed link sets. These two are swapped at the end of an iteration, next_queue is then cleared. These store sets of removed links. banned, next_banned: `banned` is the iterator variable for `queue`. `banned` is copied into `next_banned` where another -link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is immediately -deallocated. Otherwise it's placed into `next_queue`. +link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is +immediately deallocated. Otherwise it's placed into `next_queue`. -vec: `vec` is a scratch variable to store pointers to new vectors, or rather, paths while we are building them. Each time a path -is found a new one is allocated, built, and stored in the route_set. +vec: `vec` is a scratch variable to store pointers to new vectors, or rather, paths while we are building them. Each +time a path is found a new one is allocated, built, and stored in the route_set. p, connector: Scratch variables for iteration. @@ -52,44 +81,12 @@ routes aren't required small-ish things like the memcpy and banned link set copy """ -from aequilibrae.paths.graph import Graph - -from libc.math cimport INFINITY, pow, exp -from libc.string cimport memcpy -from libc.limits cimport UINT_MAX -from libc.stdlib cimport abort -from libcpp cimport nullptr -from libcpp.vector cimport vector -from libcpp.unordered_set cimport unordered_set -from libcpp.unordered_map cimport unordered_map -from libcpp.utility cimport pair -from libcpp.algorithm cimport sort, lower_bound, reverse -from cython.operator cimport dereference as deref, preincrement as inc -from cython.parallel cimport parallel, prange, threadid -cimport openmp - -import numpy as np -import pyarrow as pa -from typing import List, Tuple -import itertools -import pathlib -import logging -import warnings -from aequilibrae.matrix import AequilibraeMatrix - -cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. -cimport pyarrow as pa -cimport pyarrow.lib as libpa -import pyarrow.dataset -import pyarrow.parquet as pq -from libcpp.memory cimport shared_ptr - -from libc.stdio cimport fprintf, printf, stderr - -# It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times +# It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation +# times include 'basic_path_finding.pyx' include 'parallel_numpy.pyx' + @cython.embedsignature(True) cdef class RouteChoiceSet: """ @@ -197,24 +194,24 @@ cdef class RouteChoiceSet: del self.ods self.ods = prob_set = nullptr - @cython.embedsignature(True) def run(self, origin: int, destination: int, *args, **kwargs): - """ - Compute the a route set for a single OD pair. + """Compute the a route set for a single OD pair. Often the returned list's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all unique possible paths have been found then a smaller set will be returned. - Thin wrapper around ``RouteChoiceSet.batched``. Additional arguments are forwarded to ``RouteChoiceSet.batched``. + Additional arguments are forwarded to ``RouteChoiceSet.batched``. :Arguments: - **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a centroid. - **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to choose a centroid. + **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a + centroid. + **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to + choose a centroid. + + :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of + compact link IDs. Represents paths from ``origin`` to ``destination``. - :Returns: - **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs. - Represents paths from ``origin`` to ``destination``. """ self.batched([(origin, destination)], *args, **kwargs) where = kwargs.get("where", None) @@ -248,24 +245,29 @@ cdef class RouteChoiceSet: beta: float = 1.0, theta: float = 1.0, ): - """ - Compute the a route set for a list of OD pairs. + """Compute the a route set for a list of OD pairs. - Often the returned list for each OD pair's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all - unique possible paths have been found then a smaller set will be returned. + Often the returned list for each OD pair's length is ``max_routes``, however, it may be limited by ``max_depth`` + or if all unique possible paths have been found then a smaller set will be returned. :Arguments: - **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination node ID must be - present within compact graph. Recommended to choose a centroids. - **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited. - **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation. - Must be non-negative. Default of ``0`` for unlimited. - **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair. Terminates if exceeded. + **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination + node ID must be present within compact graph. Recommended to choose a centroids. + **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of + ``0`` for unlimited. + **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link + penalisation. Must be non-negative. Default of ``0`` for unlimited. + **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair. + Terminates if exceeded. **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``. - **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available. - **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. - **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. + **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. + Default of ``0`` for all available. + **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link + penalisation. Default ``True``. + **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible + with ``bfsle=True``. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. + """ cdef: long long o, d @@ -297,15 +299,19 @@ cdef class RouteChoiceSet: unsigned int c_max_depth = max_depth unsigned int c_max_misses = max_misses unsigned int c_seed = seed - unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads() + unsigned int c_cores = cores if cores > 0 else omp_get_num_threads() vector[pair[long long, long long]] c_ods - # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory + # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this + # memory double [:, :] cost_matrix = np.empty((c_cores, self.cost_view.shape[0]), dtype=float) long long [:, :] predecessors_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) long long [:, :] conn_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64) - long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (c_cores, self.b_nodes_view.shape[0])).copy() + long long [:, :] b_nodes_matrix = np.broadcast_to( + self.b_nodes_view, + (c_cores, self.b_nodes_view.shape[0]) + ).copy() # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the # interface. @@ -328,7 +334,10 @@ cdef class RouteChoiceSet: warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs") if where is not None: - checkpoint = Checkpoint(where, self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"]) + checkpoint = Checkpoint( + where, + self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"] + ) batches = list(Checkpoint.batches(list(set_ods))) max_results_len = max(len(batch) for batch in batches) else: @@ -340,7 +349,6 @@ cdef class RouteChoiceSet: cdef: RouteSet_t *route_set pair[vector[long long] *, vector[long long] *] freq_pair - vector[long long] *link_union_scratch = nullptr vector[vector[long long] *] *link_union_set = nullptr vector[vector[double] *] *cost_set = nullptr vector[vector[double] *] *path_overlap_set = nullptr @@ -357,7 +365,9 @@ cdef class RouteChoiceSet: for batch in batches: c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer batch_len = c_ods.size() - results.resize(batch_len) # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed + # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not + # needed + results.resize(batch_len) if path_size_logit: # we may clear these objects because it's either: @@ -374,11 +384,6 @@ cdef class RouteChoiceSet: prob_set.resize(batch_len) with nogil, parallel(num_threads=c_cores): - # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. - # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch - if path_size_logit: - link_union_scratch = new vector[long long]() - for i in prange(batch_len): origin_index = self.nodes_to_indices_view[c_ods[i].first] dest_index = self.nodes_to_indices_view[c_ods[i].second] @@ -426,13 +431,23 @@ cdef class RouteChoiceSet: ) if path_size_logit: - link_union_scratch.clear() - freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch)) + freq_pair = RouteChoiceSet.compute_frequency(route_set) deref(link_union_set)[i] = freq_pair.first deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view) - deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view) - deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(path_overlap_set)[i]), beta, theta) - del freq_pair.second # While we need the unique sorted links (.first), we don't need the frequencies (.second) + deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap( + route_set, + freq_pair, + deref(deref(cost_set)[i]), + self.cost_view + ) + deref(prob_set)[i] = RouteChoiceSet.compute_prob( + deref(deref(cost_set)[i]), + deref(deref(path_overlap_set)[i]), + beta, + theta + ) + # While we need the unique sorted links (.first), we don't need the frequencies (.second) + del freq_pair.second deref(results)[i] = route_set @@ -446,13 +461,13 @@ cdef class RouteChoiceSet: self.b_nodes_view, ) - if path_size_logit: - del link_union_scratch - if where is not None: - table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set)) + table = libpa.pyarrow_wrap_table( + self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set) + ) - # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures + # Once we've made the table all results have been copied into some pyarrow structure, we can free our + # inner internal structures if path_size_logit: for j in range(batch_len): del deref(link_union_set)[j] @@ -468,7 +483,9 @@ cdef class RouteChoiceSet: checkpoint.write(table) del table else: - pass # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in memory + # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in + # memory + pass # Here we decide if we wish to preserve our results for later saving/link loading if where is not None: @@ -490,8 +507,6 @@ cdef class RouteChoiceSet: # we should copy it to keep it around self.ods = new vector[pair[long long, long long]](c_ods) - # self.link_union ?? This could be saved as a partial results from the computation above, although it isn't easy to get out rn - @cython.initializedcheck(False) cdef void path_find( RouteChoiceSet self, @@ -569,13 +584,14 @@ cdef class RouteChoiceSet: max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX - queue.push_back(new unordered_set[long long]()) # Start with no edges banned + queue.push_back(new unordered_set[long long]()) # Start with no edges banned route_set = new RouteSet_t() rng.seed(seed) - # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned edges to consider + # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned + # edges to consider for depth in range(max_depth): - if route_set.size() >= max_routes or queue.size() == 0: + if miss_count > max_misses or route_set.size() >= max_routes or queue.size() == 0: break # If we could potentially fill the route_set after this depth, shuffle the queue @@ -583,13 +599,23 @@ cdef class RouteChoiceSet: shuffle(queue.begin(), queue.end(), rng) for banned in queue: - # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and reverse them as well + # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and + # reverse them as well memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) for connector in deref(banned): thread_cost[connector] = INFINITY - RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first) + RouteChoiceSet.path_find( + self, + origin_index, + dest_index, + thread_cost, + thread_predecessors, + thread_conn, + thread_b_nodes, + _thread_reached_first + ) # Mark this set of banned links as seen removed_links.insert(banned) @@ -597,7 +623,8 @@ cdef class RouteChoiceSet: # If the destination is reachable we must build the path and readd if thread_predecessors[dest_index] >= 0: vec = new vector[long long]() - # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be + # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know + # how long it'll be p = dest_index while p != origin_index: connector = thread_conn[p] @@ -607,14 +634,16 @@ cdef class RouteChoiceSet: reverse(vec.begin(), vec.end()) for connector in deref(vec): - # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements - # then add a single element. An incremental set hash function could be of use. However, the since of this set is - # directly dependent on the current depth and as the route set size grows so incredibly fast the depth will rarely get - # high enough for this to matter. - # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue + # This is one area for potential improvement. Here we construct a new set from the old one, + # copying all the elements then add a single element. An incremental set hash function could be + # of use. However, the since of this set is directly dependent on the current depth and as the + # route set size grows so incredibly fast the depth will rarely get high enough for this to + # matter. Copy the previously banned links, then for each vector in the path we add one and + # push it onto our queue new_banned = new unordered_set[long long](deref(banned)) new_banned.insert(connector) - # If we've already seen this set of removed links before we already know what the path is and its in our route set + # If we've already seen this set of removed links before we already know what the path is and + # its in our route set if removed_links.find(new_banned) != removed_links.end(): del new_banned else: @@ -676,11 +705,21 @@ cdef class RouteChoiceSet: if route_set.size() >= max_routes: break - RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first) + RouteChoiceSet.path_find( + self, + origin_index, + dest_index, + thread_cost, + thread_predecessors, + thread_conn, + thread_b_nodes, + _thread_reached_first + ) if thread_predecessors[dest_index] >= 0: vec = new vector[long long]() - # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be + # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how + # long it'll be p = dest_index while p != origin_index: connector = thread_conn[p] @@ -707,17 +746,18 @@ cdef class RouteChoiceSet: @cython.boundscheck(False) @cython.initializedcheck(False) @staticmethod - cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil: + cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil: cdef: vector[long long] *keys vector[long long] *counts + vector[long long] link_union + vector[long long].const_iterator union_iter + vector[long long] *route # Scratch objects size_t length, count long long link, i - link_union.clear() - keys = new vector[long long]() counts = new vector[long long]() @@ -731,8 +771,8 @@ cdef class RouteChoiceSet: sort(link_union.begin(), link_union.end()) - union_iter = link_union.begin() - while union_iter != link_union.end(): + union_iter = link_union.cbegin() + while union_iter != link_union.cend(): count = 0 link = deref(union_iter) while link == deref(union_iter): @@ -809,7 +849,8 @@ cdef class RouteChoiceSet: # We want to find the index of the link, and use that to look up it's frequency link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link) - path_overlap = path_overlap + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()] + path_overlap = path_overlap + cost_view[link] \ + / deref(freq_set.second)[link_iter - freq_set.first.begin()] path_overlap_vec.push_back(path_overlap / total_cost[j]) @@ -838,11 +879,13 @@ cdef class RouteChoiceSet: prob_vec = new vector[double]() prob_vec.reserve(total_cost.size()) - # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results + # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will + # lead to NaN results for i in range(total_cost.size()): inv_prob = 0.0 for j in range(total_cost.size()): - inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i])) + inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \ + * exp(-theta * (total_cost[j] - total_cost[i])) prob_vec.push_back(1.0 / inv_prob) @@ -858,7 +901,7 @@ cdef class RouteChoiceSet: if not isinstance(matrix, AequilibraeMatrix): raise ValueError("`matrix` is not an AequilibraE matrix") - cores = cores if cores > 0 else openmp.omp_get_num_threads() + cores = cores if cores > 0 else omp_get_num_threads() cdef: vector[vector[double] *] *path_files = nullptr @@ -879,7 +922,6 @@ cdef class RouteChoiceSet: tmp.append(deref(vec)) print(tmp) - def apply_link_loading_func(m): if generate_path_files: ll = self.apply_link_loading_from_path_files( @@ -889,7 +931,8 @@ cdef class RouteChoiceSet: else: ll = self.apply_link_loading(m) - # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship. + # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without + # transferring owner ship. compressed = &deref(ll)[0] actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64) @@ -903,7 +946,6 @@ cdef class RouteChoiceSet: del ll return actual.reshape(-1), compressed.reshape(-1) - if len(matrix.view_names) == 1: link_loads = apply_link_loading_func(matrix.matrix_view) else: @@ -945,9 +987,6 @@ cdef class RouteChoiceSet: long long i with parallel(num_threads=cores): - # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them. - # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch - for i in prange(ods.size()): link_union = link_union_set[i] loads = new vector[double](link_union.size(), 0.0) # FIXME FREE ME @@ -961,15 +1000,18 @@ cdef class RouteChoiceSet: if prob == 0.0: continue - # For each link in the route, we need to assign the appropriate demand * prob - # Because the link union is known to be sorted, if the links in the route are also sorted we can just step - # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us - # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation. - # This is also incredibly cache efficient, the only downsides are that the code is harder to read - # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already - # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted - # then used an N-way merge we could reuse the sorted routes and the sorted link union. - links = new vector[long long](deref(route)) # we copy the links in case the routes haven't already been saved # FIXME FREE ME + # For each link in the route, we need to assign the appropriate demand * prob Because the link union + # is known to be sorted, if the links in the route are also sorted we can just step along both + # arrays simultaneously, skipping elements in the link_union when appropriate. This allows us to + # operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense + # formulation. This is also incredibly cache efficient, the only downsides are that the code is + # harder to read and it requires sorting the route. NOTE: the sorting of routes is technically + # something that is already computed, during the computation of the link frequency we merge and sort + # all links, if we instead sorted then used an N-way merge we could reuse the sorted routes and the + # sorted link union. + + # We copy the links in case the routes haven't already been saved # FIXME FREE ME + links = new vector[long long](deref(route)) sort(links.begin(), links.end()) # links and link_union are sorted, and links is a subset of link_union @@ -1024,7 +1066,7 @@ cdef class RouteChoiceSet: for j in range(link_union.size()): link = deref(link_union)[j] - deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j] # += here results in all zeros? Odd + deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j] return link_loads @@ -1117,8 +1159,9 @@ cdef class RouteChoiceSet: for i in range(ods.size()): route_set = route_sets[i] - # Instead of construction a "list of lists" style object for storing the route sets we instead will construct one big array of link ids - # with a corresponding offsets array that indicates where each new row (path) starts. + # Instead of construction a "list of lists" style object for storing the route sets we instead will + # construct one big array of link ids with a corresponding offsets array that indicates where each new row + # (path) starts. for route in deref(route_set): o_col.Append(ods[i].first) d_col.Append(ods[i].second) @@ -1141,7 +1184,13 @@ cdef class RouteChoiceSet: offset_builder.Append(offset) # Mark the end of the array in offsets offset_builder.Finish(&offsets) - route_set_results = libpa.CListArray.FromArraysAndType(route_set_dtype, deref(offsets.get()), deref(paths.get()), pool, shared_ptr[libpa.CBuffer]()) + route_set_results = libpa.CListArray.FromArraysAndType( + route_set_dtype, + deref(offsets.get()), + deref(paths.get()), + pool, + shared_ptr[libpa.CBuffer]() + ) o_col.Finish(&columns[0]) d_col.Finish(&columns[1]) @@ -1152,7 +1201,9 @@ cdef class RouteChoiceSet: path_overlap_col.Finish(&columns[4]) prob_col.Finish(&columns[5]) - cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema) + cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema( + RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema + ) cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(schema, columns) del path_builder @@ -1195,7 +1246,7 @@ cdef class Checkpoint: A small wrapper class to write a dataset partition by partition """ - def __init__(self, where, schema, partition_cols = None): + def __init__(self, where, schema, partition_cols=None): """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" self.where = pathlib.Path(where) self.schema = schema diff --git a/setup.cfg b/setup.cfg index eb3eee575..0651c05d8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,4 +2,8 @@ universal = 1 [metadata] -license_files = [LICENSE.TXT] \ No newline at end of file +license_files = [LICENSE.TXT] + +[pycodestyle] +max-line-length = 120 +ignore = E225 \ No newline at end of file From 5b5f6aed4372ff1377f4c8a0007a30ba66de8ec1 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:12:31 +1000 Subject: [PATCH 32/52] Spelling, remove clamping, make algorithm positional or keyword arg --- aequilibrae/paths/route_choice.py | 21 ++++++++------------ tests/aequilibrae/paths/test_route_choice.py | 4 ++-- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 40c726947..56c4b0751 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -24,7 +24,7 @@ class RouteChoice: } def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None): - self.paramaters = self.default_paramaters.copy() + self.parameters = self.default_paramaters.copy() self.procedure_id = uuid4().hex proj = project or get_active_project(must_exist=False) @@ -50,7 +50,7 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro self._config = {} - def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: + def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: """ Chooses the assignment algorithm and set parameters. Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. @@ -101,7 +101,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None: self.algorithm = algo self._config["Algorithm"] = algo - self.paramaters = defaults | kwargs + self.parameters = defaults | kwargs def set_cores(self, cores: int) -> None: """Allows one to set the number of cores to be used @@ -154,8 +154,8 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None: if all( isinstance(pair, tuple) and len(pair) == 2 - and isinstance(pair[0], (int, np.unsignedinteger)) - and isinstance(pair[1], (int, np.unsignedinteger)) + and isinstance(pair[0], (int, np.integer)) + and isinstance(pair[1], (int, np.integer)) for pair in nodes ): self.nodes = nodes @@ -193,7 +193,7 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool path_size_logit=perform_assignment, cores=self.cores, where=str(self.where) if self.where is not None else None, - **self.paramaters, + **self.parameters, ) def execute(self, perform_assignment: bool = False) -> None: @@ -223,7 +223,7 @@ def execute(self, perform_assignment: bool = False) -> None: path_size_logit=perform_assignment, cores=self.cores, where=str(self.where) if self.where is not None else None, - **self.paramaters, + **self.parameters, ) def info(self) -> dict: @@ -249,7 +249,7 @@ def info(self) -> dict: "Matrix totals": matrix_totals, "Computer name": socket.gethostname(), "Procedure ID": self.procedure_id, - "Parameters": self.paramaters, + "Parameters": self.parameters, } return info @@ -289,7 +289,6 @@ def get_load_results( :Arguments: **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`. - **clamp** (:obj:`bool`): Whether or not to treat values `< 1e-15` as `0.0`. :Returns: **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): @@ -319,10 +318,6 @@ def get_load_results( self.link_loads = {fields[0]: tmp[0]} self.compact_link_loads = {fields[0]: tmp[1]} - if clamp: - for v in itertools.chain(self.link_loads.values(), self.compact_link_loads.values()): - v[(v < 1e-15)] = 0.0 - # Get a mapping from the compressed graph to/from the network graph m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values) m_compact = _get_graph_to_network_mapping( diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 8d5542f24..7ed57c67c 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -274,12 +274,12 @@ def test_set_choice_set_generation(self): rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) self.assertDictEqual( - rc.paramaters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} + rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} ) rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1) self.assertDictEqual( - rc.paramaters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0} + rc.parameters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0} ) with self.assertRaises(ValueError): From 858e60cd54e7ca531a7bd7b5b1c513b8600e67da Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:20:05 +1000 Subject: [PATCH 33/52] Forget import --- aequilibrae/paths/route_choice_set.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index e846b0311..ef7b2126d 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -1,6 +1,7 @@ # cython: language_level=3str from aequilibrae.paths.graph import Graph +from aequilibrae.matrix import AequilibraeMatrix from cython.operator cimport dereference as deref from cython.operator cimport preincrement as inc @@ -25,7 +26,8 @@ from typing import List, Tuple import numpy as np import pyarrow as pa -from aequilibrae.matrix import AequilibraeMatrix +import pyarrow.dataset +import pyarrow.parquet as pq cimport numpy as np # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython. cimport pyarrow as pa From a8c72db884771e089c553303b1f856ffebb87bb6 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:21:45 +1000 Subject: [PATCH 34/52] Skip 3.9 builds --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8fab0fcf1..8abeb35de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,10 +59,10 @@ max-complexity = 20 [tool.cibuildwheel] # While we test this facility we we will only build for 3.10 #build = ["cp310-*"] -build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"] +build = ["cp310-*", "cp311-*", "cp312-*"] # We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows -skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] +skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] test-skip = "" archs = ["auto"] From bb738647e82737d47b790a18889ab8fbc147baac Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:23:01 +1000 Subject: [PATCH 35/52] Revert "Skip 3.9 builds" This reverts commit a8c72db884771e089c553303b1f856ffebb87bb6. --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8abeb35de..8fab0fcf1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,10 +59,10 @@ max-complexity = 20 [tool.cibuildwheel] # While we test this facility we we will only build for 3.10 #build = ["cp310-*"] -build = ["cp310-*", "cp311-*", "cp312-*"] +build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"] # We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows -skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] +skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"] test-skip = "" archs = ["auto"] From 2c69ecd89f3e24f06306f28344b3eac5f4f4969b Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 14:28:54 +1000 Subject: [PATCH 36/52] Drop 3.8 from unit tests --- .github/workflows/unit_tests.yml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index b8f65c694..fa8fb93f0 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -31,7 +31,7 @@ jobs: runs-on: ${{ matrix.os}} strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] + python-version: ['3.9', '3.10', '3.11', '3.12'] os: [windows-latest, ubuntu-latest] max-parallel: 20 diff --git a/setup.py b/setup.py index e9a851217..804ac8f38 100644 --- a/setup.py +++ b/setup.py @@ -123,7 +123,6 @@ license_files=("LICENSE.TXT",), classifiers=[ "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", From f55eac93e6c051330f66e56bfdf1af0334d019bf Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 27 Mar 2024 16:31:17 +1000 Subject: [PATCH 37/52] Don't run off the end of the vector --- aequilibrae/paths/route_choice_set.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index ef7b2126d..fe8ccfc71 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -777,7 +777,7 @@ cdef class RouteChoiceSet: while union_iter != link_union.cend(): count = 0 link = deref(union_iter) - while link == deref(union_iter): + while link == deref(union_iter) and union_iter != link_union.cend(): count = count + 1 inc(union_iter) @@ -1022,7 +1022,7 @@ cdef class RouteChoiceSet: while link_iter != links.cend(): # Find the next location for the current link in links - while deref(link_iter) != deref(link_union_iter): + while deref(link_iter) != deref(link_union_iter) and link_iter != links.cend(): inc(link_union_iter) link_loc = link_union_iter - link_union.cbegin() From 0c7f1db435e25c107a724fa04d98bc6f691b529f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 23 Apr 2024 17:13:55 +1000 Subject: [PATCH 38/52] Remove FIXMEs, update docs strings, spelling errors --- aequilibrae/paths/route_choice.py | 11 +- aequilibrae/paths/route_choice_set.pxd | 11 +- aequilibrae/paths/route_choice_set.pyx | 153 +++++++++++-------- tests/aequilibrae/paths/test_route_choice.py | 5 +- 4 files changed, 105 insertions(+), 75 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 56c4b0751..e41002047 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -229,7 +229,7 @@ def execute(self, perform_assignment: bool = False) -> None: def info(self) -> dict: """Returns information for the transit assignment procedure - Dictionary contains keys 'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID'. + Dictionary contains keys 'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', and 'Parameters'. The classes key is also a dictionary with all the user classes per transit class and their respective matrix totals @@ -260,9 +260,10 @@ def log_specification(self): def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]: """Returns the results of the route choice procedure - Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. + Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). + Represents paths from ``origin`` to ``destination``. - If `save_routes` was specified then a Pyarrow dataset is returned. The call is responsible for reading this dataset. + If `save_routes` was specified then a Pyarrow dataset is returned. The caller is responsible for reading this dataset. :Returns: **results** (:obj:`pa.Table`): Table with the results of the route choice procedure @@ -292,8 +293,8 @@ def get_load_results( :Returns: **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): - A tuple of uncompressed and compressed DataFrames with the link loading results. Or - the requested link loading result.s + A tuple of uncompressed and compressed link loading results as DataFrames. Or + the requested link loading results. """ diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index 0044a28c6..a8c317796 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -43,7 +43,7 @@ cdef extern from "" namespace "std" nogil: cdef extern from "" namespace "std" nogil: pair[T, U] make_pair[T, U](T&& t, U&& u) -# To define our own hashing functions we have to write a little cpp. The string is inlined directly into route_choice.cpp +# To define our own hashing functions we have to write a little C++. The string is inlined directly into route_choice.cpp # To make Cython aware of our hash types we also must declare them with the right signatures # # OrderedVectorPointerHasher: This hash function is for hashing the routes, thus it should be order *DEPENDENT*. @@ -55,8 +55,8 @@ cdef extern from "" namespace "std" nogil: # New hash functions and their use in authentication and set equality # https://doi.org/10.1016/0022-0000(81)90033-7 # -# PointerDereferenceEqualTo: Because we are storing and hashing the pointers to objects to avoid unnessecary copies we must -# define our own comparitor to resolve hash collisions. Without this equaility operator the bare pointers are compared. +# PointerDereferenceEqualTo: Because we are storing and hashing the pointers to objects to avoid unnecessary copies we must +# define our own comparator to resolve hash collisions. Without this equality operator the bare pointers are compared. cdef extern from * nogil: """ // Source: https://stackoverflow.com/a/72073933 @@ -104,7 +104,7 @@ cdef extern from * nogil: bool operator()(const T& lhs, const T& rhs) const -# For typing (haha) convenince, the types names are getting long +# For typing (haha) convenience, the types names are getting long ctypedef unordered_set[vector[long long] *, OrderedVectorPointerHasher, PointerDereferenceEqualTo[vector[long long] *]] RouteSet_t ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, PointerDereferenceEqualTo[unordered_set[long long] *]] LinkSet_t ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t @@ -153,6 +153,8 @@ cdef class RouteChoiceSet: unsigned int [:] mapping_idx unsigned int [:] mapping_data + cdef void deallocate(RouteChoiceSet self) nogil + cdef void path_find( RouteChoiceSet self, long origin_index, @@ -228,6 +230,7 @@ cdef class RouteChoiceSet: cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil + cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores) cdef shared_ptr[libpa.CTable] make_table_from_results( RouteChoiceSet self, diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index fe8ccfc71..5dbdba814 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -16,7 +16,7 @@ from libcpp.unordered_map cimport unordered_map from libcpp.unordered_set cimport unordered_set from libcpp.utility cimport pair from libcpp.vector cimport vector -from openmp cimport omp_get_num_threads +from openmp cimport omp_get_max_threads import itertools import logging @@ -147,14 +147,12 @@ cdef class RouteChoiceSet: def __dealloc__(self): """ C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a - partially deallocated state already. + partially deallocated state already. Do not call any other Python method. """ - self.deallocate_results() + self.deallocate() - def deallocate_results(self): - """ - Deallocate stored results, existing extracted results are not invalidated. - """ + cdef void deallocate(RouteChoiceSet self) nogil: + """__dealloc__ cannot be called from normal code.""" cdef: RouteSet_t *route_set vector[long long] *link_vec @@ -212,8 +210,7 @@ cdef class RouteChoiceSet: choose a centroid. :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of - compact link IDs. Represents paths from ``origin`` to ``destination``. - + link IDs. Represents paths from ``origin`` to ``destination``. """ self.batched([(origin, destination)], *args, **kwargs) where = kwargs.get("where", None) @@ -269,7 +266,6 @@ cdef class RouteChoiceSet: **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. - """ cdef: long long o, d @@ -301,7 +297,7 @@ cdef class RouteChoiceSet: unsigned int c_max_depth = max_depth unsigned int c_max_misses = max_misses unsigned int c_seed = seed - unsigned int c_cores = cores if cores > 0 else omp_get_num_threads() + unsigned int c_cores = cores if cores > 0 else omp_get_max_threads() vector[pair[long long, long long]] c_ods @@ -362,10 +358,10 @@ cdef class RouteChoiceSet: path_overlap_set = new vector[vector[double] *](max_results_len) prob_set = new vector[vector[double] *](max_results_len) - self.deallocate_results() # We have be storing results from a previous run + self.deallocate() # We may be storing results from a previous run for batch in batches: - c_ods = batch # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer + c_ods = batch # Convert the batch to a C++ vector, this isn't strictly efficient but is nicer batch_len = c_ods.size() # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not # needed @@ -485,8 +481,8 @@ cdef class RouteChoiceSet: checkpoint.write(table) del table else: - # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in - # memory + # where is None implies len(batches) == 1, i.e. there was only one batch and we should keep everything + # in memory pass # Here we decide if we wish to preserve our results for later saving/link loading @@ -625,7 +621,7 @@ cdef class RouteChoiceSet: # If the destination is reachable we must build the path and readd if thread_predecessors[dest_index] >= 0: vec = new vector[long long]() - # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know + # Walk the predecessors tree to find our path, we build it up in a C++ vector because we can't know # how long it'll be p = dest_index while p != origin_index: @@ -689,6 +685,7 @@ cdef class RouteChoiceSet: double penatly, unsigned int seed ) noexcept nogil: + """Link penalisation algorithm for choice set generation.""" cdef: RouteSet_t *route_set @@ -720,7 +717,7 @@ cdef class RouteChoiceSet: if thread_predecessors[dest_index] >= 0: vec = new vector[long long]() - # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how + # Walk the predecessors tree to find our path, we build it up in a C++ vector because we can't know how # long it'll be p = dest_index while p != origin_index: @@ -728,12 +725,12 @@ cdef class RouteChoiceSet: p = thread_predecessors[p] vec.push_back(connector) - reverse(vec.begin(), vec.end()) - for connector in deref(vec): thread_cost[connector] *= penatly - # To prevent runaway algorithms if we find a n duplicate routes we should stop + reverse(vec.begin(), vec.end()) + + # To prevent runaway algorithms if we find N duplicate routes we should stop status = route_set.insert(vec) miss_count = miss_count + (not status.second) if miss_count > max_misses: @@ -749,6 +746,11 @@ cdef class RouteChoiceSet: @cython.initializedcheck(False) @staticmethod cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil: + """ + Compute a frequency map for each route. + + Each node at index i in the first returned vector has frequency at index i in the second vector. + """ cdef: vector[long long] *keys vector[long long] *counts @@ -792,6 +794,7 @@ cdef class RouteChoiceSet: @cython.initializedcheck(False) @staticmethod cdef vector[double] *compute_cost(RouteSet_t *route_set, double[:] cost_view) noexcept nogil: + """Compute the cost each route.""" cdef: vector[double] *cost_vec @@ -823,6 +826,8 @@ cdef class RouteChoiceSet: double[:] cost_view ) noexcept nogil: """ + Compute the path overlap figure based on the route cost and frequency. + Notation changes: i: j a: link @@ -871,6 +876,7 @@ cdef class RouteChoiceSet: double beta, double theta ) noexcept nogil: + """Compute a probability for each route in the route set based on the path overlap.""" cdef: # Scratch objects vector[double] *prob_vec @@ -895,6 +901,9 @@ cdef class RouteChoiceSet: @cython.embedsignature(True) def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0): + """ + Apply link loading to the network using the demand matrix and the previously computed route sets. + """ if self.ods == nullptr \ or self.link_union_set == nullptr \ or self.prob_set == nullptr: @@ -903,11 +912,12 @@ cdef class RouteChoiceSet: if not isinstance(matrix, AequilibraeMatrix): raise ValueError("`matrix` is not an AequilibraE matrix") - cores = cores if cores > 0 else omp_get_num_threads() + cores = cores if cores > 0 else omp_get_max_threads() cdef: vector[vector[double] *] *path_files = nullptr vector[double] *ll + vector[double] *vec if generate_path_files: path_files = RouteChoiceSet.compute_path_files( @@ -918,46 +928,49 @@ cdef class RouteChoiceSet: cores, ) - # FIXME, write out path files - tmp = [] - for vec in deref(path_files): - tmp.append(deref(vec)) - print(tmp) - - def apply_link_loading_func(m): - if generate_path_files: - ll = self.apply_link_loading_from_path_files( - m, - deref(path_files), - ) - else: - ll = self.apply_link_loading(m) - - # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without - # transferring owner ship. - compressed = &deref(ll)[0] - - actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64) - assign_link_loads_cython( - actual, - compressed, - self.graph_compressed_id_view, - cores - ) - compressed = np.array(compressed, copy=True) - del ll - return actual.reshape(-1), compressed.reshape(-1) + # # FIXME, write out path files + # tmp = [] + # for vec in deref(path_files): + # tmp.append(deref(vec)) + # print(tmp) if len(matrix.view_names) == 1: - link_loads = apply_link_loading_func(matrix.matrix_view) + link_loads = self.apply_link_loading_func(matrix.matrix_view, path_files, generate_path_files, cores) else: link_loads = { - name: apply_link_loading_func(matrix.matrix_view[:, :, i]) + name: self.apply_link_loading_func(matrix.matrix_view[:, :, i], path_files, generate_path_files, cores) for i, name in enumerate(matrix.names) } + if generate_path_files: + for vec in deref(path_files): + del vec + del path_files + return link_loads + cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores): + """Helper function for self.link_loading. Cannot free a pointer captured in a local scope by a lambda.""" + if generate_path_files: + ll = self.apply_link_loading_from_path_files(m, deref(pf)) + else: + ll = self.apply_link_loading(m) + + # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without + # transferring ownership. + compressed = &deref(ll)[0] + + actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64) + assign_link_loads_cython( + actual, + compressed, + self.graph_compressed_id_view, + cores + ) + compressed = np.array(compressed, copy=True) + del ll + return actual.reshape(-1), compressed.reshape(-1) + @cython.boundscheck(False) @cython.wraparound(False) @cython.embedsignature(True) @@ -976,10 +989,10 @@ cdef class RouteChoiceSet: Returns vector of vectors of link loads corresponding to each link in it's link_union_set. """ cdef: - vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size()) # FIXME FREE ME + vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size()) vector[long long] *link_union vector[double] *loads - vector[double] *link + vector[long long] *links vector[long long].const_iterator link_union_iter vector[long long].const_iterator link_iter @@ -991,7 +1004,7 @@ cdef class RouteChoiceSet: with parallel(num_threads=cores): for i in prange(ods.size()): link_union = link_union_set[i] - loads = new vector[double](link_union.size(), 0.0) # FIXME FREE ME + loads = new vector[double](link_union.size(), 0.0) # We now iterate over all routes in the route_set, each route has an associated probability route_prob_iter = prob_set[i].cbegin() @@ -1006,13 +1019,14 @@ cdef class RouteChoiceSet: # is known to be sorted, if the links in the route are also sorted we can just step along both # arrays simultaneously, skipping elements in the link_union when appropriate. This allows us to # operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense - # formulation. This is also incredibly cache efficient, the only downsides are that the code is - # harder to read and it requires sorting the route. NOTE: the sorting of routes is technically - # something that is already computed, during the computation of the link frequency we merge and sort - # all links, if we instead sorted then used an N-way merge we could reuse the sorted routes and the - # sorted link union. + # formulation. This is also more cache efficient, the only downsides are that the code is + # harder to read and it requires sorting the route. + + # NOTE: the sorting of routes is technically something that is already computed, during the + # computation of the link frequency we merge and sort all links, if we instead sorted then used an + # N-way merge we could reuse the sorted routes and the sorted link union. - # We copy the links in case the routes haven't already been saved # FIXME FREE ME + # We copy the links in case the routes haven't already been saved links = new vector[long long](deref(route)) sort(links.begin(), links.end()) @@ -1030,6 +1044,8 @@ cdef class RouteChoiceSet: inc(link_iter) + del links + deref(link_loads)[i] = loads return link_loads @@ -1056,7 +1072,7 @@ cdef class RouteChoiceSet: long origin_index, dest_index double demand - vector[double] *link_loads = new vector[double](self.num_links) # FIXME FREE ME + vector[double] *link_loads = new vector[double](self.num_links) for i in range(self.ods.size()): loads = path_files[i] @@ -1088,7 +1104,7 @@ cdef class RouteChoiceSet: long origin_index, dest_index double demand, prob, load - vector[double] *link_loads = new vector[double](self.num_links) # FIXME FREE ME + vector[double] *link_loads = new vector[double](self.num_links) for i in range(self.ods.size()): route_set = deref(self.results)[i] @@ -1121,6 +1137,15 @@ cdef class RouteChoiceSet: vector[vector[double] *] *path_overlap_set, vector[vector[double] *] *prob_set ): + """ + Construct an Arrow table from C++ stdlib structures. + + Note: this function directly utilises the Arrow C++ API, the Arrow Cython API is not sufficient. + See `route_choice_set.pxd` for Cython declarations. + + Returns a shared pointer to a Arrow CTable. This should be wrapped in a Python table before use. + Compressed link IDs are expanded to full network link IDs. + """ cdef: shared_ptr[libpa.CArray] paths shared_ptr[libpa.CArray] offsets @@ -1162,7 +1187,7 @@ cdef class RouteChoiceSet: route_set = route_sets[i] # Instead of construction a "list of lists" style object for storing the route sets we instead will - # construct one big array of link ids with a corresponding offsets array that indicates where each new row + # construct one big array of link IDs with a corresponding offsets array that indicates where each new row # (path) starts. for route in deref(route_set): o_col.Append(ods[i].first) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 7ed57c67c..ca3d31529 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -8,7 +8,7 @@ import numpy as np import pyarrow as pa -from aequilibrae import Graph, Project +from aequilibrae import Project from aequilibrae.paths.route_choice_set import RouteChoiceSet from aequilibrae.paths.route_choice import RouteChoice @@ -55,7 +55,8 @@ def test_route_choice(self): results, [(2, 6, 9, 13, 25, 30, 53, 59)], "Initial route isn't the shortest A* route" ) - # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths + # A depth of 2 should yield the same initial route plus the length of that route more routes minus + # duplicates and unreachable paths results2 = rc.run(a, b, max_routes=0, max_depth=2, **kwargs) self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth") From fa5b2e68da5bb138f31cd8ee8f6c8e49e2dabaa1 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 30 Apr 2024 10:00:41 +1000 Subject: [PATCH 39/52] Add test with known results --- tests/aequilibrae/paths/test_route_choice.py | 48 ++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index ca3d31529..8eca8b14e 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -11,6 +11,7 @@ from aequilibrae import Project from aequilibrae.paths.route_choice_set import RouteChoiceSet from aequilibrae.paths.route_choice import RouteChoice +from aequilibrae.matrix import AequilibraeMatrix from ...data import siouxfalls_project @@ -225,6 +226,53 @@ def test_link_loading(self): np.testing.assert_array_almost_equal(link_loads, link_loads2) + def test_known_results(self): + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + + mat = AequilibraeMatrix() + mat.create_empty( + memory_only=True, + zones=self.graph.num_zones, + matrix_names=["all zeros", "single one"], + ) + mat.index = self.graph.centroids[:] + mat.computational_view() + mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0) + mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones)) + + for od in nodes: + mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0 + + mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0 + + link_loads = rc.link_loading(mat) + + with self.subTest(matrix="all zeros"): + u, c = link_loads["all zeros"] + np.testing.assert_allclose(u, 0.0) + np.testing.assert_allclose(c, 0.0) + + with self.subTest(matrix="single one"): + u, c = link_loads["single one"] + link = self.graph.graph[ + (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1) + ] + + lid = link.link_id.values[0] + c_lid = link.__compressed_id__.values[0] + + self.assertAlmostEqual(u[lid - 1], 1.0) + self.assertAlmostEqual(c[c_lid], 1.0) + + u[lid - 1] = 0.0 + c[c_lid] = 0.0 + + np.testing.assert_allclose(u, 0.0) + np.testing.assert_allclose(c, 0.0) + class TestRouteChoice(TestCase): def setUp(self) -> None: From 5d0c731306f16d7202dd4ffb16fa89a89933b1cf Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 30 Apr 2024 17:42:25 +1000 Subject: [PATCH 40/52] Move graph index building to Cython for free 1.5x --- aequilibrae/paths/graph.py | 62 +---------------------- aequilibrae/paths/graph_building.pyx | 73 ++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 60 deletions(-) diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py index a8ee6a2ce..495c7ff72 100644 --- a/aequilibrae/paths/graph.py +++ b/aequilibrae/paths/graph.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from aequilibrae.paths.graph_building import build_compressed_graph +from aequilibrae.paths.graph_building import build_compressed_graph, create_compressed_link_network_mapping from aequilibrae.context import get_logger @@ -553,65 +553,7 @@ def create_compressed_link_network_mapping(self): **data** (:obj:`np.array`): array of link ids """ - # Cache the result, this isn't a huge computation but isn't worth doing twice - if ( - self.compressed_link_network_mapping_idx is not None - and self.compressed_link_network_mapping_data is not None - and self.network_compressed_node_mapping is not None - ): - return ( - self.compressed_link_network_mapping_idx, - self.compressed_link_network_mapping_data, - self.network_compressed_node_mapping, - ) - - # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't - # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute - - # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`, - # we skip them. - filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1] - gb = filtered.groupby(by="__compressed_id__", sort=True) - idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32) - data = np.zeros(len(filtered), dtype=np.uint32) - - node_mapping = np.full(self.num_nodes, -1) - - i = 0 - for compressed_id, df in gb: - idx[compressed_id] = i - values = df.link_id.values - a = df.a_node.values - b = df.b_node.values - - # In order to ensure that the link IDs come out in the correct order we must walk the links - # we do this assuming the `a` array is sorted. - j = 0 - # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup - - a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0] - while True: - tmp = a.searchsorted(x) - if tmp < len(a) and a[tmp] == x: - x = b[tmp] - data[i + j] = values[tmp] - else: - break - j += 1 - - b_node = x - node_mapping[a_node] = self.compact_graph["a_node"].iat[compressed_id] - node_mapping[b_node] = self.compact_graph["b_node"].iat[compressed_id] - - i += len(values) - - idx[-1] = i - - self.compressed_link_network_mapping_idx = idx - self.compressed_link_network_mapping_data = data - self.network_compressed_node_mapping = node_mapping - - return idx, data, node_mapping + return create_compressed_link_network_mapping(self) class Graph(GraphBase): diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx index 3bb3ef4c9..724643de8 100644 --- a/aequilibrae/paths/graph_building.pyx +++ b/aequilibrae/paths/graph_building.pyx @@ -381,3 +381,76 @@ def build_compressed_graph(graph): # If will refer all the links that have no correlation to an element beyond the last link # This element will always be zero during assignment graph.graph.__compressed_id__ = graph.graph.__compressed_id__.fillna(graph.compact_graph.id.max() + 1).astype(np.int64) + + +@cython.embedsignature(True) +@cython.boundscheck(False) +@cython.initializedcheck(False) +def create_compressed_link_network_mapping(graph): + # Cache the result, this isn't a huge computation but isn't worth doing twice + if ( + graph.compressed_link_network_mapping_idx is not None + and graph.compressed_link_network_mapping_data is not None + and graph.network_compressed_node_mapping is not None + ): + return ( + graph.compressed_link_network_mapping_idx, + graph.compressed_link_network_mapping_data, + graph.network_compressed_node_mapping, + ) + + cdef: + long long i, j, a_node, x, b_node, tmp, compressed_id + long long[:] b + long long[:] values + np.uint32_t[:] idx + np.uint32_t[:] data + np.int32_t[:] node_mapping + + # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't + # bother redoing sorting it. + + # Some links are completely removed from the network, they are assigned ID `graph.compact_graph.id.max() + 1`, + # we skip them. + filtered = graph.graph[graph.graph.__compressed_id__ != graph.compact_graph.id.max() + 1] + gb = filtered.groupby(by="__compressed_id__", sort=True) + idx = np.zeros(graph.compact_num_links + 1, dtype=np.uint32) + data = np.zeros(len(filtered), dtype=np.uint32) + + node_mapping = np.full(graph.num_nodes, -1, dtype=np.int32) + + i = 0 + for compressed_id, df in gb: + idx[compressed_id] = i + values = df.link_id.values + a = df.a_node.values + b = df.b_node.values + + # In order to ensure that the link IDs come out in the correct order we must walk the links + # we do this assuming the `a` array is sorted. + j = 0 + # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup + + a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0] + while True: + tmp = a.searchsorted(x) + if tmp < len(a) and a[tmp] == x: + x = b[tmp] + data[i + j] = values[tmp] + else: + break + j += 1 + + b_node = x + node_mapping[a_node] = graph.compact_graph["a_node"].iat[compressed_id] + node_mapping[b_node] = graph.compact_graph["b_node"].iat[compressed_id] + + i += len(values) + + idx[-1] = i + + graph.compressed_link_network_mapping_idx = idx + graph.compressed_link_network_mapping_data = data + graph.network_compressed_node_mapping = node_mapping + + return idx, data, node_mapping From e290069abc9eb92e3b093627db1e94e93be299db Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 30 Apr 2024 17:43:07 +1000 Subject: [PATCH 41/52] Add select link support with sparse matrices --- aequilibrae/matrix/sparse_matrix.pxd | 13 ++ aequilibrae/matrix/sparse_matrix.pyx | 74 +++++++ aequilibrae/paths/route_choice.py | 205 ++++++++++++------- aequilibrae/paths/route_choice_set.pxd | 11 +- aequilibrae/paths/route_choice_set.pyx | 127 ++++++++++-- setup.py | 12 +- tests/aequilibrae/paths/test_route_choice.py | 8 +- 7 files changed, 351 insertions(+), 99 deletions(-) create mode 100644 aequilibrae/matrix/sparse_matrix.pxd create mode 100644 aequilibrae/matrix/sparse_matrix.pyx diff --git a/aequilibrae/matrix/sparse_matrix.pxd b/aequilibrae/matrix/sparse_matrix.pxd new file mode 100644 index 000000000..53c5b611b --- /dev/null +++ b/aequilibrae/matrix/sparse_matrix.pxd @@ -0,0 +1,13 @@ +from libcpp.vector cimport vector + +cdef class Sparse: + pass + +cdef class COO(Sparse): + cdef: + vector[size_t] *row + vector[size_t] *col + vector[double] *data + readonly object shape + + cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx new file mode 100644 index 000000000..7c393834e --- /dev/null +++ b/aequilibrae/matrix/sparse_matrix.pyx @@ -0,0 +1,74 @@ +from libcpp.vector cimport vector +from libcpp cimport nullptr +from cython.operator cimport dereference as d + +import scipy.sparse +import numpy as np + +cdef class Sparse: + """ + A class to implement sparse matrix operations such as reading, writing, and indexing + """ + + def __cinit__(self): + """C level init. For C memory allocation and initialisation. Called exactly once per object.""" + pass + + def __init__(self): + """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" + pass + + def __dealloc__(self): + """ + C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a + partially deallocated state already. + """ + pass + + +cdef class COO(Sparse): + """ + A class to implement sparse matrix operations such as reading, writing, and indexing + """ + + def __cinit__(self): + """C level init. For C memory allocation and initialisation. Called exactly once per object.""" + + self.row = new vector[size_t]() + self.col = new vector[size_t]() + self.data = new vector[double]() + + def __init__(self, shape=None): + """Python level init, may be called multiple times, for things that can't be done in __cinit__.""" + + self.shape = shape + + def __dealloc__(self): + """ + C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a + partially deallocated state already. + """ + + del self.row + self.row = nullptr + + del self.col + self.col = nullptr + + del self.data + self.data = nullptr + + def to_scipy(self, shape=None): + row = &d(self.row)[0] + col = &d(self.col)[0] + data = &d(self.data)[0] + + if shape is None: + shape = self.shape + + return scipy.sparse.coo_matrix((data, (row, col)), dtype=np.float64, shape=shape) + + cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil: + self.row.push_back(i) + self.col.push_back(j) + self.data.push_back(v) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index e41002047..8b836a42d 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -1,9 +1,11 @@ import itertools +import warnings import logging import pathlib import socket -from typing import List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union, Dict from uuid import uuid4 +from functools import cached_property import numpy as np import pandas as pd @@ -35,13 +37,16 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro self.cores: int = 0 self.graph = graph self.matrix = matrix - self.__rc = None self.schema = RouteChoiceSet.schema self.psl_schema = RouteChoiceSet.psl_schema - self.compact_link_loads: Optional[np.array] = None - self.link_loads: Optional[np.array] = None + self.compact_link_loads: Optional[Dict[str, np.array]] = None + self.link_loads: Optional[Dict[str, np.array]] = None + + self.sl_compact_link_loads: Optional[Dict[str, np.array]] = None + self.sl_link_loads: Optional[Dict[str, np.array]] = None + self.results: Optional[pa.Table] = None self.where: Optional[pathlib.Path] = None self.save_path_files: bool = False @@ -49,6 +54,11 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro self.nodes: Optional[Union[List[int], List[Tuple[int, int]]]] = None self._config = {} + self._selected_links = {} + + @cached_property + def __rc(self) -> RouteChoiceSet: + return RouteChoiceSet(self.graph) def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: """ @@ -182,8 +192,6 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool :Returns: ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs. """ - if self.__rc is None: - self.__rc = RouteChoiceSet(self.graph) self.results = None return self.__rc.run( @@ -213,9 +221,6 @@ def execute(self, perform_assignment: bool = False) -> None: "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`" ) - if self.__rc is None: - self.__rc = RouteChoiceSet(self.graph) - self.results = None self.__rc.batched( self.nodes, @@ -229,7 +234,8 @@ def execute(self, perform_assignment: bool = False) -> None: def info(self) -> dict: """Returns information for the transit assignment procedure - Dictionary contains keys 'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', and 'Parameters'. + Dictionary contains keys 'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', 'Parameters', and + 'Select links'. The classes key is also a dictionary with all the user classes per transit class and their respective matrix totals @@ -250,6 +256,7 @@ def info(self) -> dict: "Computer name": socket.gethostname(), "Procedure ID": self.procedure_id, "Parameters": self.parameters, + "Select links": self._selected_links, } return info @@ -280,93 +287,139 @@ def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]: return self.results - def get_load_results( - self, - which: str = "uncompressed", - clamp: bool = True, - ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]: + def get_load_results(self) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]: """ Translates the link loading results from the graph format into the network format. - :Arguments: - **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`. - :Returns: **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): - A tuple of uncompressed and compressed link loading results as DataFrames. Or - the requested link loading results. - + A tuple of uncompressed and compressed link loading results as DataFrames. + Columns are the matrix name concatenated direction. """ - if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]: - raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']") - if self.matrix is None: raise ValueError( "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute." ) - compressed = which == "both" or which == "compressed" - uncompressed = which == "both" or which == "uncompressed" - - fields = self.matrix.names - tmp = self.__rc.link_loading(self.matrix, self.save_path_files) - if isinstance(tmp, dict): - self.link_loads = {k: v[0] for k, v in tmp.items()} - self.compact_link_loads = {k: v[1] for k, v in tmp.items()} - else: - self.link_loads = {fields[0]: tmp[0]} - self.compact_link_loads = {fields[0]: tmp[1]} + self.link_loads = {k: v[0] for k, v in tmp.items()} + self.compact_link_loads = {k: v[1] for k, v in tmp.items()} - # Get a mapping from the compressed graph to/from the network graph + # Create a data store with a row for each uncompressed link m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values) + lids = np.unique(self.graph.graph.link_id.values) + uncompressed_df = self.__link_loads_to_df(m, lids, self.link_loads) + m_compact = _get_graph_to_network_mapping( self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values ) - - lids = np.unique(self.graph.graph.link_id.values) compact_lids = np.unique(self.graph.compact_graph.link_id.values) - # Create a data store with a row for each uncompressed link - if uncompressed: - uncompressed_df = pd.DataFrame( - {"link_id": lids} - | {k + dir: np.zeros(lids.shape) for k in self.link_loads.keys() for dir in ["_ab", "_ba"]} - ) - for k, v in self.link_loads.items(): - # Directional Flows - uncompressed_df[k + "_ab"].values[m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx]) - uncompressed_df[k + "_ba"].values[m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx]) - - # Tot Flow - uncompressed_df[k + "_tot"] = np.nan_to_num(uncompressed_df[k + "_ab"].values) + np.nan_to_num( - uncompressed_df[k + "_ba"].values - ) + compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.compact_link_loads) - if compressed: - compressed_df = pd.DataFrame( - {"link_id": compact_lids} - | { - k + dir: np.zeros(compact_lids.shape) - for k in self.compact_link_loads.keys() - for dir in ["_ab", "_ba"] - } - ) - for k, v in self.compact_link_loads.items(): - compressed_df[k + "_ab"].values[m_compact.network_ab_idx] = np.nan_to_num(v[m_compact.graph_ab_idx]) - compressed_df[k + "_ba"].values[m_compact.network_ba_idx] = np.nan_to_num(v[m_compact.graph_ba_idx]) + return uncompressed_df, compressed_df - # Tot Flow - compressed_df[k + "_tot"] = np.nan_to_num(compressed_df[k + "_ab"].values) + np.nan_to_num( - compressed_df[k + "_ba"].values - ) + def __link_loads_to_df(self, mapping, lids, link_loads): + df = pd.DataFrame( + {"link_id": lids} | {k + dir: np.zeros(lids.shape) for k in link_loads.keys() for dir in ["_ab", "_ba"]} + ) + for k, v in link_loads.items(): + # Directional Flows + df[k + "_ab"].values[mapping.network_ab_idx] = np.nan_to_num(v[mapping.graph_ab_idx]) + df[k + "_ba"].values[mapping.network_ba_idx] = np.nan_to_num(v[mapping.graph_ba_idx]) - if uncompressed and not compressed: - return uncompressed_df - elif not uncompressed and compressed: - return compressed_df - else: - return uncompressed_df, compressed_df + # Tot Flow + df[k + "_tot"] = np.nan_to_num(df[k + "_ab"].values) + np.nan_to_num(df[k + "_ba"].values) + + return df + + def set_select_links(self, links: Dict[str, List[Tuple[int, int]]]): + """ + Set the selected links. Checks if the links and directions are valid. Translates `links=None` and + direction into unique link ID used in compact graph. + + Supply `links=None` to disable select link analysis. + + :Arguments: + **links** (:obj:`Union[None, Dict[str, List[Tuple[int, int]]]]`): name of link set and + Link IDs and directions to be used in select link analysis. + """ + self._selected_links = {} + + if links is None: + del self._config["select_links"] + return + + max_id = self.graph.compact_graph.id.max() + 1 + + for name, link_set in links.items(): + if len(name.split(" ")) != 1: + warnings.warn("Input string name has a space in it. Replacing with _") + name = str.join("_", name.split(" ")) + + link_ids = [] + for link, dir in link_set: + if dir == 0: + query = (self.graph.graph["link_id"] == link) & ( + (self.graph.graph["direction"] == -1) | (self.graph.graph["direction"] == 1) + ) + else: + query = (self.graph.graph["link_id"] == link) & (self.graph.graph["direction"] == dir) + if not query.any(): + raise ValueError(f"link_id or direction {(link, dir)} is not present within graph.") + # Check for duplicate compressed link ids in the current link set + for comp_id in self.graph.graph[query]["__compressed_id__"].values: + if comp_id == max_id: + raise ValueError( + f"link ID {link} and direction {dir} is not present in compressed graph. " + "It may have been removed during dead-end removal." + ) + elif comp_id in link_ids: + warnings.warn( + "Two input links map to the same compressed link in the network" + f", removing superfluous link {link} and direction {dir} with compressed id {comp_id}" + ) + else: + link_ids.append(comp_id) + self._selected_links[name] = link_ids + self._config["select_links"] = str(links) def get_select_link_results(self) -> pd.DataFrame: - raise NotImplementedError() + """ + Get the select link loading results. + + :Returns: + **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): + A tuple of uncompressed and compressed select link loading results as DataFrames. + Columns are the matrix name concatenated with the select link set and direction. + """ + + if self.matrix is None: + raise ValueError( + "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute." + ) + + tmp = self.__rc.select_link_loading(self.matrix, self._selected_links) + + self.sl_link_loads = {} + self.sl_compact_link_loads = {} + self.sl_od_matrix = {} + for name, sl_res in tmp.items(): + for sl_name, res in sl_res.items(): + mat, (u, c) = res + self.sl_od_matrix[name + "_" + sl_name] = mat + self.sl_link_loads[name + "_" + sl_name] = u + self.sl_compact_link_loads[name + "_" + sl_name] = c + + # Create a data store with a row for each uncompressed link + m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values) + lids = np.unique(self.graph.graph.link_id.values) + uncompressed_df = self.__link_loads_to_df(m, lids, self.sl_link_loads) + + m_compact = _get_graph_to_network_mapping( + self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values + ) + compact_lids = np.unique(self.graph.compact_graph.link_id.values) + compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.sl_compact_link_loads) + + return uncompressed_df, compressed_df diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index a8c317796..c4a016194 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -1,5 +1,7 @@ # cython: language_level=3str from aequilibrae.paths.results import PathResults +from aequilibrae.matrix.sparse_matrix cimport COO + from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set from libcpp.unordered_map cimport unordered_map @@ -230,7 +232,14 @@ cdef class RouteChoiceSet: cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil - cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores) + cdef apply_link_loading_func(RouteChoiceSet self, vector[double] *ll, int cores) + + cdef vector[double] *apply_select_link_loading( + RouteChoiceSet self, + COO sparse_mat, + double[:, :] matrix_view, + unordered_set[long] &select_link_set + ) noexcept nogil cdef shared_ptr[libpa.CTable] make_table_from_results( RouteChoiceSet self, diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 5dbdba814..af258b1da 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -2,6 +2,7 @@ from aequilibrae.paths.graph import Graph from aequilibrae.matrix import AequilibraeMatrix +from aequilibrae.matrix.sparse_matrix cimport COO from cython.operator cimport dereference as deref from cython.operator cimport preincrement as inc @@ -916,7 +917,6 @@ cdef class RouteChoiceSet: cdef: vector[vector[double] *] *path_files = nullptr - vector[double] *ll vector[double] *vec if generate_path_files: @@ -934,13 +934,15 @@ cdef class RouteChoiceSet: # tmp.append(deref(vec)) # print(tmp) - if len(matrix.view_names) == 1: - link_loads = self.apply_link_loading_func(matrix.matrix_view, path_files, generate_path_files, cores) - else: - link_loads = { - name: self.apply_link_loading_func(matrix.matrix_view[:, :, i], path_files, generate_path_files, cores) - for i, name in enumerate(matrix.names) - } + link_loads = {} + for i, name in enumerate(matrix.names): + m = matrix.matrix_view if len(matrix.view_names) == 1 else matrix.matrix_view[:, :, i] + + ll = self.apply_link_loading_from_path_files(m, deref(path_files)) \ + if generate_path_files else self.apply_link_loading(m) + + link_loads[name] = self.apply_link_loading_func(ll, cores) + del ll if generate_path_files: for vec in deref(path_files): @@ -949,13 +951,8 @@ cdef class RouteChoiceSet: return link_loads - cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores): - """Helper function for self.link_loading. Cannot free a pointer captured in a local scope by a lambda.""" - if generate_path_files: - ll = self.apply_link_loading_from_path_files(m, deref(pf)) - else: - ll = self.apply_link_loading(m) - + cdef apply_link_loading_func(RouteChoiceSet self, vector[double] *ll, int cores): + """Helper function for link_loading.""" # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without # transferring ownership. compressed = &deref(ll)[0] @@ -968,7 +965,6 @@ cdef class RouteChoiceSet: cores ) compressed = np.array(compressed, copy=True) - del ll return actual.reshape(-1), compressed.reshape(-1) @cython.boundscheck(False) @@ -1062,8 +1058,6 @@ cdef class RouteChoiceSet: """ Apply link loading from path files. - If path files have already been computed then this is a more efficient manner for the link loading. - Returns a vector of link loads indexed by compressed link ID. """ cdef: @@ -1101,6 +1095,7 @@ cdef class RouteChoiceSet: cdef: RouteSet_t *route_set vector[double] *route_set_prob + vector[double].const_iterator route_prob_iter long origin_index, dest_index double demand, prob, load @@ -1125,6 +1120,102 @@ cdef class RouteChoiceSet: return link_loads + @cython.embedsignature(True) + def select_link_loading(RouteChoiceSet self, matrix, select_links: Dict[str, List[long]], cores: int = 0): + """ + Apply link loading to the network using the demand matrix and the previously computed route sets. + """ + if self.ods == nullptr \ + or self.link_union_set == nullptr \ + or self.prob_set == nullptr: + raise ValueError("select link loading requires Route Choice path_size_logit results") + + if not isinstance(matrix, AequilibraeMatrix): + raise ValueError("`matrix` is not an AequilibraE matrix") + + cores = cores if cores > 0 else omp_get_max_threads() + + cdef: + unordered_set[long] select_link_set + vector[double] *ll + + link_loads = {} + + for i, name in enumerate(matrix.names): + matrix_ll = {} + m = matrix.matrix_view if len(matrix.view_names) == 1 else matrix.matrix_view[:, :, i] + for (k, v) in select_links.items(): + select_link_set = v + + coo = COO((self.zones, self.zones)) + + ll = self.apply_select_link_loading(coo, m, select_link_set) + res = self.apply_link_loading_func(ll, cores) + del ll + + matrix_ll[k] = (coo, res) + link_loads[name] = matrix_ll + + return link_loads + + @cython.boundscheck(False) + @cython.wraparound(False) + @cython.embedsignature(True) + @cython.initializedcheck(False) + cdef vector[double] *apply_select_link_loading( + RouteChoiceSet self, + COO sparse_mat, + double[:, :] matrix_view, + unordered_set[long] &select_link_set + ) noexcept nogil: + """ + Apply select link loading. + + Returns a vector of link loads indexed by compressed link ID. + """ + cdef: + RouteSet_t *route_set + vector[double] *route_set_prob + vector[double].const_iterator route_prob_iter + long origin_index, dest_index, o, d + double demand, prob, load + + vector[double] *link_loads = new vector[double](self.num_links) + + bool link_present = False + + # For each OD pair, if a route contains one or more links in a select link set, add that ODs demand to + # a sparse matrix of Os to Ds + + # For each route, if it contains one or more links in a select link set, apply the link loading for + # that route + + for i in range(self.ods.size()): + route_set = deref(self.results)[i] + route_set_prob = deref(self.prob_set)[i] + + origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first] + dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second] + demand = matrix_view[origin_index, dest_index] + + route_prob_iter = route_set_prob.cbegin() + for route in deref(route_set): + prob = deref(route_prob_iter) + inc(route_prob_iter) + load = prob * demand + + for link in deref(route): + if select_link_set.find(link) != select_link_set.end(): + sparse_mat.append(origin_index, dest_index, load) + link_present = True + break + + if link_present: + for link in deref(route): + deref(link_loads)[link] = deref(link_loads)[link] + load # += here results in all zeros? Odd + + return link_loads + @cython.wraparound(False) @cython.embedsignature(True) @cython.boundscheck(False) diff --git a/setup.py b/setup.py index 804ac8f38..17e693d9b 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,16 @@ language="c++", ) +ext_mod_sparse_matrix = Extension( + "aequilibrae.matrix.sparse_matrix", + [join("aequilibrae", "matrix", "sparse_matrix.pyx")], + extra_compile_args=compile_args, + extra_link_args=link_args, + define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")], + include_dirs=include_dirs, + language="c++", +) + with open("requirements.txt", "r") as fl: install_requirements = [x.strip() for x in fl.readlines()] @@ -130,7 +140,7 @@ ], cmdclass={"build_ext": build_ext}, ext_modules=cythonize( - [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building], + [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building, ext_mod_sparse_matrix], compiler_directives={"language_level": "3str"}, ), ) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 8eca8b14e..f0db90b89 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -221,10 +221,12 @@ def test_link_loading(self): nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) - link_loads = rc.link_loading(self.mat) - link_loads2 = rc.link_loading(self.mat, generate_path_files=True) + n = self.mat.names[0] - np.testing.assert_array_almost_equal(link_loads, link_loads2) + ll = rc.link_loading(self.mat)[n] + ll2 = rc.link_loading(self.mat, generate_path_files=True)[n] + + np.testing.assert_array_almost_equal(ll, ll2) def test_known_results(self): np.random.seed(0) From 63cc837f49735cd5c796ebebb1b007c7bf809bec Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 1 May 2024 17:25:15 +1000 Subject: [PATCH 42/52] Add select link tests and fix bug --- aequilibrae/paths/route_choice_set.pyx | 3 +- tests/aequilibrae/paths/test_route_choice.py | 127 +++++++++++++------ 2 files changed, 93 insertions(+), 37 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index af258b1da..a55e8e1e5 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -1182,7 +1182,7 @@ cdef class RouteChoiceSet: vector[double] *link_loads = new vector[double](self.num_links) - bool link_present = False + bool link_present # For each OD pair, if a route contains one or more links in a select link set, add that ODs demand to # a sparse matrix of Os to Ds @@ -1204,6 +1204,7 @@ cdef class RouteChoiceSet: inc(route_prob_iter) load = prob * demand + link_present = False for link in deref(route): if select_link_set.find(link) != select_link_set.end(): sparse_mat.append(origin_index, dest_index, load) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index f0db90b89..36710f67b 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -27,7 +27,7 @@ def setUp(self) -> None: self.project = Project() self.project.open(proj_path) - self.project.network.build_graphs(fields=["distance"], modes=["c"]) + self.project.network.build_graphs(fields=["distance", "free_flow_time"], modes=["c"]) self.graph = self.project.network.graphs["c"] # type: Graph self.graph.set_graph("distance") self.graph.set_blocked_centroid_flows(False) @@ -229,51 +229,106 @@ def test_link_loading(self): np.testing.assert_array_almost_equal(ll, ll2) def test_known_results(self): - np.random.seed(0) - rc = RouteChoiceSet(self.graph) - nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + for cost in ["distance", "free_flow_time"]: + with self.subTest(cost=cost): + self.graph.set_graph(cost) - mat = AequilibraeMatrix() - mat.create_empty( - memory_only=True, - zones=self.graph.num_zones, - matrix_names=["all zeros", "single one"], - ) - mat.index = self.graph.centroids[:] - mat.computational_view() - mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0) - mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones)) + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + + mat = AequilibraeMatrix() + mat.create_empty( + memory_only=True, + zones=self.graph.num_zones, + matrix_names=["all zeros", "single one"], + ) + mat.index = self.graph.centroids[:] + mat.computational_view() + mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0) + mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones)) - for od in nodes: - mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0 + for od in nodes: + mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0 - mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0 + mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0 - link_loads = rc.link_loading(mat) + link_loads = rc.link_loading(mat) + table = rc.get_results().to_pandas() - with self.subTest(matrix="all zeros"): - u, c = link_loads["all zeros"] - np.testing.assert_allclose(u, 0.0) - np.testing.assert_allclose(c, 0.0) + with self.subTest(matrix="all zeros"): + u, c = link_loads["all zeros"] + np.testing.assert_allclose(u, 0.0) + np.testing.assert_allclose(c, 0.0) + + with self.subTest(matrix="single one"): + u, c = link_loads["single one"] + link = self.graph.graph[ + (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1) + ] + + lid = link.link_id.values[0] + c_lid = link.__compressed_id__.values[0] + t = table[table["route set"].apply(lambda x, lid=lid: lid in set(x))] + v = t.probability.sum() + + self.assertAlmostEqual(u[lid - 1], v, places=6) + self.assertAlmostEqual(c[c_lid], v, places=6) + + def test_select_link(self): + for cost in ["distance", "free_flow_time"]: + with self.subTest(cost=cost): + self.graph.set_graph(cost) + + np.random.seed(0) + rc = RouteChoiceSet(self.graph) + nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) + + mat = AequilibraeMatrix() + mat.create_empty( + memory_only=True, + zones=self.graph.num_zones, + matrix_names=["all ones"], + ) + mat.index = self.graph.centroids[:] + mat.computational_view() + mat.matrix_view[:, :] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0) + + table = rc.get_results().to_pandas() + + # Shortest routes between 20-4, and 21-2 share links 23 and 26. Link 26 also appears in between 10-8 and 17-9 + # 20-4 also shares 11 with 5-3 + ods = [(20, 4), (21, 2), (10, 8), (17, 9)] + sl_link_loads = rc.select_link_loading( + mat, + { + "sl1": self.graph.graph.set_index("link_id").loc[[23, 26]].__compressed_id__.to_list(), + "sl2": self.graph.graph.set_index("link_id").loc[[11]].__compressed_id__.to_list(), + }, + ) - with self.subTest(matrix="single one"): - u, c = link_loads["single one"] - link = self.graph.graph[ - (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1) - ] + m, (u, c) = sl_link_loads["all ones"]["sl1"] + m2, (u2, c2) = sl_link_loads["all ones"]["sl2"] + m = m.to_scipy() + m2 = m2.to_scipy() + self.assertSetEqual(set(zip(*(m > 0.0001).nonzero())), {(o - 1, d - 1) for o, d in ods}) + self.assertSetEqual(set(zip(*(m2 > 0.0001).nonzero())), {(20 - 1, 4 - 1), (5 - 1, 3 - 1)}) - lid = link.link_id.values[0] - c_lid = link.__compressed_id__.values[0] + t1 = table[(table.probability > 0.0) & table["route set"].apply(lambda x: bool(set(x) & {23, 26}))] + t2 = table[(table.probability > 0.0) & table["route set"].apply(lambda x: 11 in set(x))] + sl1_link_union = np.unique(np.hstack(t1["route set"].values)) + sl2_link_union = np.unique(np.hstack(t2["route set"].values)) - self.assertAlmostEqual(u[lid - 1], 1.0) - self.assertAlmostEqual(c[c_lid], 1.0) + np.testing.assert_equal(u.nonzero()[0] + 1, sl1_link_union) + np.testing.assert_equal(u2.nonzero()[0] + 1, sl2_link_union) - u[lid - 1] = 0.0 - c[c_lid] = 0.0 + np.testing.assert_allclose(u, c) + np.testing.assert_allclose(u2, c2) - np.testing.assert_allclose(u, 0.0) - np.testing.assert_allclose(c, 0.0) + self.assertAlmostEqual(u.sum(), (t1["route set"].apply(len) * t1.probability).sum()) + self.assertAlmostEqual(u2.sum(), (t2["route set"].apply(len) * t2.probability).sum()) class TestRouteChoice(TestCase): From 8a222d2320ad0be3076ad054a3614cb55a1b7a76 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 1 May 2024 17:25:30 +1000 Subject: [PATCH 43/52] Add sparse matrix writing --- aequilibrae/matrix/sparse_matrix.pyx | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx index 7c393834e..3ee7cbb5f 100644 --- a/aequilibrae/matrix/sparse_matrix.pyx +++ b/aequilibrae/matrix/sparse_matrix.pyx @@ -4,6 +4,7 @@ from cython.operator cimport dereference as d import scipy.sparse import numpy as np +import openmatrix as omx cdef class Sparse: """ @@ -25,6 +26,13 @@ cdef class Sparse: """ pass + def to_disk(self, path, name: str): + f = omx.open_file(path, "a") + try: + f[name] = self.to_scipy().tocsr().toarray() + finally: + f.close() + cdef class COO(Sparse): """ @@ -58,7 +66,7 @@ cdef class COO(Sparse): del self.data self.data = nullptr - def to_scipy(self, shape=None): + def to_scipy(self, shape=None, dtype=np.float64): row = &d(self.row)[0] col = &d(self.col)[0] data = &d(self.data)[0] @@ -66,7 +74,7 @@ cdef class COO(Sparse): if shape is None: shape = self.shape - return scipy.sparse.coo_matrix((data, (row, col)), dtype=np.float64, shape=shape) + return scipy.sparse.coo_matrix((data, (row, col)), dtype=dtype, shape=shape) cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil: self.row.push_back(i) From c16e3970573bc7bf96383d982b95bb28cbc3daf3 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 7 May 2024 11:35:57 +1000 Subject: [PATCH 44/52] Update docs, add small api tests --- .../trip_distribution/plot_route_choice.py | 51 +++++++++---- tests/aequilibrae/paths/test_route_choice.py | 73 +++++++++++++------ 2 files changed, 90 insertions(+), 34 deletions(-) diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py index 5f285cb22..a8d1990f7 100644 --- a/docs/source/examples/trip_distribution/plot_route_choice.py +++ b/docs/source/examples/trip_distribution/plot_route_choice.py @@ -1,10 +1,11 @@ -""" -.. _example_usage_route_choice: +""".. _example_usage_route_choice: Route Choice ================= -In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La Serena Metropolitan Area in Chile. +In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La +Serena Metropolitan Area in Chile. + """ # Imports @@ -52,7 +53,8 @@ # let's say we want to minimize the distance graph.set_graph("distance") -# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of western centroids. +# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of +# western centroids. graph.prepare_graph(np.array(list(range(28, 41)) + list(range(49, 91)))) # %% @@ -77,13 +79,15 @@ from aequilibrae.paths import RouteChoice # %% -# This object construct might take a minute depending on the size of the graph due to the construction of the compressed link to network link mapping that's required. -# This is a one time operation per graph and is cached. -# We need to supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed. +# This object construct might take a minute depending on the size of the graph due to the construction of the compressed +# link to network link mapping that's required. This is a one time operation per graph and is cached. We need to +# supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed. rc = RouteChoice(graph, mat) # %% -# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE based on the paper + +# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE +# based on the paper # "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013). # https://doi.org/10.1080/18128602.2012.671383 # It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results. @@ -96,7 +100,8 @@ print(rc.default_paramaters) # %% -# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid as well an an assignment. +# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid +# as well an an assignment. results = rc.execute_single(28, 90, perform_assignment=True) print(results[0]) @@ -106,8 +111,8 @@ rc.get_results().to_pandas() # %% -# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD pairs we'd like to use, or we can provided a 1D list -# and the generation will be run on all permutations. +# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD +# pairs we'd like to use, or we can provided a 1D list and the generation will be run on all permutations. rc.prepare(graph.centroids[:5]) # You can inspect the result with rc.nodes # %% @@ -117,8 +122,28 @@ # %% # Since we provided a matrix initially we can also perform link loading based on our assignment results. -# We can specify which link loading we want, either just uncompressed, just compressed, or both. -rc.get_load_results(which="both") +rc.get_load_results() + +# %% +# Select link analysis +# ~~~~~~~~~~~~~~~~~~ +# We can also enable select link analysis by providing the links and the directions that we are interested in +rc.set_select_links({"sl1": [(5372, 1), (5374, 1)], "sl2": [(23845, 0)]}) + +# %% +# We can get then the results in a Pandas data frame for both the network and compressed graph. +u_sl, c_sl = rc.get_select_link_results() +u_sl + +# %% +# We can also access the OD matrices for this link loading. These matrices are sparse and can be converted to +# scipy.sparse matrices for ease of use. They're stored in a dictionary where the key is the matrix name concatenated +# wit the select link set name via an underscore. These matrices are constructed during `get_select_link_results`. +list(rc.sl_od_matrix.keys()) + +# %% +od_matrix = rc.sl_od_matrix["demand_sl1"] +od_matrix.to_scipy() # %% project.close() diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 36710f67b..59df125d8 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -349,53 +349,84 @@ def setUp(self) -> None: self.mat = self.project.matrices.get_matrix("demand_omx") self.mat.computational_view() - def test_prepare(self): - rc = RouteChoice(self.graph, self.mat) + self.rc = RouteChoice(self.graph, self.mat) + def test_prepare(self): with self.assertRaises(ValueError): - rc.prepare([]) + self.rc.prepare([]) with self.assertRaises(ValueError): - rc.prepare(["1", "2"]) + self.rc.prepare(["1", "2"]) with self.assertRaises(ValueError): - rc.prepare([("1", "2")]) + self.rc.prepare([("1", "2")]) with self.assertRaises(ValueError): - rc.prepare([1]) + self.rc.prepare([1]) - rc.prepare([1, 2]) - self.assertListEqual(rc.nodes, [(1, 2), (2, 1)]) - rc.prepare([(1, 2)]) - self.assertListEqual(rc.nodes, [(1, 2)]) + self.rc.prepare([1, 2]) + self.assertListEqual(self.rc.nodes, [(1, 2), (2, 1)]) + self.rc.prepare([(1, 2)]) + self.assertListEqual(self.rc.nodes, [(1, 2)]) def test_set_save_routes(self): - rc = RouteChoice(self.graph, self.mat) + self.rc = RouteChoice(self.graph, self.mat) with self.assertRaises(ValueError): - rc.set_save_routes("/non-existent-path") + self.rc.set_save_routes("/non-existent-path") def test_set_choice_set_generation(self): - rc = RouteChoice(self.graph, self.mat) - - rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) + self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) self.assertDictEqual( - rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} + self.rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} ) - rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1) + self.rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1) self.assertDictEqual( - rc.parameters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0} + self.rc.parameters, + {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}, ) with self.assertRaises(ValueError): - rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0) + self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0) with self.assertRaises(ValueError): - rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1) + self.rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1) with self.assertRaises(AttributeError): - rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1) + self.rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1) + + def test_link_results(self): + self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) + + self.rc.set_select_links({"sl1": [(23, 1), (26, 1)], "sl2": [(11, 0)]}) + + self.rc.prepare(self.graph.centroids) + + self.rc.execute(perform_assignment=True) + + u, c = self.rc.get_load_results() + u_sl, c_sl = self.rc.get_select_link_results() + + pd.testing.assert_frame_equal(u, c) + pd.testing.assert_frame_equal(u_sl, c_sl) + + self.assertListEqual( + list(u.columns), + ["link_id"] + [mat_name + "_" + dir for dir in ["ab", "ba", "tot"] for mat_name in self.mat.names], + ) + + self.assertListEqual( + list(u_sl.columns), + ["link_id"] + + [ + mat_name + "_" + sl_name + "_" + dir + for sl_name in ["sl1", "sl2"] + for dir in ["ab", "ba"] + for mat_name in self.mat.names + ] + + [mat_name + "_" + sl_name + "_tot" for sl_name in ["sl1", "sl2"] for mat_name in self.mat.names], + ) def generate_line_strings(project, graph, results): From a7815549f7dfc169b1d9c8df995b1b9e2a379f8f Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 8 May 2024 16:11:54 +1000 Subject: [PATCH 45/52] Add sparse matrix tests and from disk method --- aequilibrae/matrix/__init__.py | 1 + aequilibrae/matrix/sparse_matrix.pyx | 40 +++++++++++++++++++ .../aequilibrae/matrix/test_sparse_matrix.py | 38 ++++++++++++++++++ 3 files changed, 79 insertions(+) create mode 100644 tests/aequilibrae/matrix/test_sparse_matrix.py diff --git a/aequilibrae/matrix/__init__.py b/aequilibrae/matrix/__init__.py index 39ae5bde4..18521fe8a 100644 --- a/aequilibrae/matrix/__init__.py +++ b/aequilibrae/matrix/__init__.py @@ -1,2 +1,3 @@ from .aequilibrae_matrix import AequilibraeMatrix, matrix_export_types from .aequilibrae_data import AequilibraeData, data_export_types +from .sparse_matrix import Sparse, COO diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx index 3ee7cbb5f..270850223 100644 --- a/aequilibrae/matrix/sparse_matrix.pyx +++ b/aequilibrae/matrix/sparse_matrix.pyx @@ -33,6 +33,24 @@ cdef class Sparse: finally: f.close() + @classmethod + def from_disk(cls, path, names=None, aeq=False): + """ + Read a OMX file and return a dictionary of matrix names to a scipy.sparse matrix, or + aequilibrae.matrix.sparse matrix. + """ + f = omx.open_file(path, "r") + res = {} + try: + for matrix in (f.list_matrices() if names is None else names): + if aeq: + res[matrix] = cls.from_matrix(f[matrix]) + else: + res[matrix] = scipy.sparse.csr_matrix(f[matrix]) + return res + finally: + f.close() + cdef class COO(Sparse): """ @@ -67,6 +85,9 @@ cdef class COO(Sparse): self.data = nullptr def to_scipy(self, shape=None, dtype=np.float64): + """ + Create scipy.sparse.coo_matrix from this COO matrix. + """ row = &d(self.row)[0] col = &d(self.col)[0] data = &d(self.data)[0] @@ -76,6 +97,25 @@ cdef class COO(Sparse): return scipy.sparse.coo_matrix((data, (row, col)), dtype=dtype, shape=shape) + @classmethod + def from_matrix(cls, m): + """ + Create COO matrix from an dense or scipy-like matrix. + """ + if not isinstance(m, scipy.sparse.coo_matrix): + m = scipy.sparse.coo_matrix(m) + + self = cls() + + cdef size_t[:] row = m.row.astype(np.uint64), col = m.row.astype(np.uint64) + cdef double[:] data = m.data + + self.row.insert(self.row.end(), &row[0], &row[-1] + 1) + self.col.insert(self.col.end(), &col[0], &col[-1] + 1) + self.data.insert(self.data.end(), &data[0], &data[-1] + 1) + + return self + cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil: self.row.push_back(i) self.col.push_back(j) diff --git a/tests/aequilibrae/matrix/test_sparse_matrix.py b/tests/aequilibrae/matrix/test_sparse_matrix.py new file mode 100644 index 000000000..f75dc70da --- /dev/null +++ b/tests/aequilibrae/matrix/test_sparse_matrix.py @@ -0,0 +1,38 @@ +from tempfile import gettempdir +from aequilibrae.matrix import COO +from unittest import TestCase +from uuid import uuid4 +import scipy.sparse +import numpy as np +import pathlib + + +class TestSparseMatrix(TestCase): + def setUp(self) -> None: + self.data = np.full((100, 100), 5.0) + self.dir = pathlib.Path(gettempdir()) / uuid4().hex + self.dir.mkdir() + + def tearDown(self) -> None: + pass + + def test_round_trip(self): + p = self.dir / "test.omx" + + coo = COO.from_matrix( + self.data, + ) + coo.to_disk(p, "m1") + coo.to_disk(p, "m2") + + sp = coo.to_scipy() + + coo1 = COO.from_disk(p) + coo2 = COO.from_disk(p, aeq=True) + + for m in ["m1", "m2"]: + self.assertIsInstance(coo1[m], scipy.sparse.csr_matrix) + self.assertIsInstance(coo2[m], COO) + + np.testing.assert_allclose(sp.A, coo1[m].A) + np.testing.assert_allclose(sp.A, coo2[m].to_scipy().A) From 9e39428c4fb7b7ae81d67b636db33cc5e585cad1 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 22 May 2024 15:33:25 +1000 Subject: [PATCH 46/52] Add link loading and select link results saving --- aequilibrae/paths/route_choice.py | 119 +++++++++++++++++-- tests/aequilibrae/paths/test_route_choice.py | 34 +++++- 2 files changed, 145 insertions(+), 8 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 8b836a42d..00753f4ad 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -3,6 +3,8 @@ import logging import pathlib import socket +import sqlite3 +from datetime import datetime from typing import List, Optional, Tuple, Union, Dict from uuid import uuid4 from functools import cached_property @@ -27,7 +29,8 @@ class RouteChoice: def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None): self.parameters = self.default_paramaters.copy() - self.procedure_id = uuid4().hex + self.procedure_id = None + self.procedure_date = None proj = project or get_active_project(must_exist=False) self.project = proj @@ -192,6 +195,8 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool :Returns: ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs. """ + self.procedure_id = uuid4().hex + self.procedure_date = str(datetime.today()) self.results = None return self.__rc.run( @@ -221,6 +226,8 @@ def execute(self, perform_assignment: bool = False) -> None: "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`" ) + self.procedure_date = str(datetime.today()) + self.results = None self.__rc.batched( self.nodes, @@ -244,11 +251,14 @@ def info(self) -> dict: **info** (:obj:`dict`): Dictionary with summary information """ - matrix_totals = ( - {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)} - if self.matrix is not None - else None - ) + if self.matrix is None: + matrix_totals = {} + elif len(self.matrix.view_names) == 1: + matrix_totals = {self.matrix.view_names[0]: np.sum(self.matrix.matrix_view[:, :])} + else: + matrix_totals = { + nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names) + } info = { "Algorithm": self.algorithm, @@ -389,7 +399,7 @@ def get_select_link_results(self) -> pd.DataFrame: Get the select link loading results. :Returns: - **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`): + **dataset** (:obj:`Tuple[pd.DataFrame, pd.DataFrame]`): A tuple of uncompressed and compressed select link loading results as DataFrames. Columns are the matrix name concatenated with the select link set and direction. """ @@ -423,3 +433,98 @@ def get_select_link_results(self) -> pd.DataFrame: compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.sl_compact_link_loads) return uncompressed_df, compressed_df + + def __save_dataframe(self, df, method_name: str, description: str, table_name: str, report: dict, project) -> None: + self.procedure_id = uuid4().hex + data = [ + table_name, + "select link", + self.procedure_id, + str(report), + self.procedure_date, + description, + ] + + # sqlite3 context managers only commit, they don't close, oh well + conn = sqlite3.connect(pathlib.Path(project.project_base_path) / "results_database.sqlite") + with conn: + df.to_sql(table_name, conn, index=False) + conn.close() + + conn = project.connect() + with conn: + conn.execute( + """Insert into results(table_name, procedure, procedure_id, procedure_report, timestamp, + description) Values(?,?,?,?,?,?)""", + data, + ) + conn.close() + + def save_link_flows(self, table_name: str, project=None) -> None: + """ + Saves the link link flows for all classes into the results database. + + :Arguments: + **table_name** (:obj:`str`): Name of the table being inserted to. + **project** (:obj:`Project`, `Optional`): Project we want to save the results to. + Defaults to the active project + """ + if not project: + project = self.project or get_active_project() + + u, c = self.get_load_results() + info = self.info() + self.__save_dataframe( + u, + "Link loading", + "Uncompressed link loading results", + table_name + "_uncompressed", + info, + project=project, + ) + + self.__save_dataframe( + c, + "Link loading", + "Compressed link loading results", + table_name + "_compressed", + info, + project=project, + ) + + def save_select_link_flows(self, table_name: str, project=None) -> None: + """ + Saves the select link link flows for all classes into the results database. Additionally, it exports + the OD matrices into OMX format. + + :Arguments: + **table_name** (:obj:`str`): Name of the table being inserted to and the name of the + OpenMatrix file used for OD matrices. + **project** (:obj:`Project`, `Optional`): Project we want to save the results to. + Defaults to the active project + """ + if not project: + project = self.project or get_active_project() + + u, c = self.get_select_link_results() + info = self.info() + self.__save_dataframe( + u, + "Select link analysis", + "Uncompressed select link analysis results", + table_name + "_uncompressed", + info, + project=project, + ) + + self.__save_dataframe( + c, + "Select link analysis", + "Compressed select link analysis results", + table_name + "_compressed", + info, + project=project, + ) + + for k, v in self.sl_od_matrix.items(): + v.to_disk((pathlib.Path(project.project_base_path) / "matrices" / table_name).with_suffix(".omx"), k) diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index 59df125d8..de3f257de 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -2,6 +2,8 @@ import uuid import zipfile from os.path import join, dirname +import pathlib +import sqlite3 from tempfile import gettempdir from unittest import TestCase import pandas as pd @@ -11,7 +13,7 @@ from aequilibrae import Project from aequilibrae.paths.route_choice_set import RouteChoiceSet from aequilibrae.paths.route_choice import RouteChoice -from aequilibrae.matrix import AequilibraeMatrix +from aequilibrae.matrix import AequilibraeMatrix, Sparse from ...data import siouxfalls_project @@ -428,6 +430,36 @@ def test_link_results(self): + [mat_name + "_" + sl_name + "_tot" for sl_name in ["sl1", "sl2"] for mat_name in self.mat.names], ) + def test_saving(self): + self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) + self.rc.set_select_links({"sl1": [(23, 1), (26, 1)], "sl2": [(11, 0)]}) + self.rc.prepare(self.graph.centroids) + self.rc.execute(perform_assignment=True) + u, c = self.rc.get_load_results() + u_sl, c_sl = self.rc.get_select_link_results() + + self.rc.save_link_flows("ll") + self.rc.save_select_link_flows("sl") + + conn = sqlite3.connect(pathlib.Path(self.project.project_base_path) / "results_database.sqlite") + with conn: + for table, df in [ + ("ll_uncompressed", u), + ("ll_compressed", c), + ("sl_uncompressed", u_sl), + ("sl_compressed", c_sl), + ]: + with self.subTest(table=table): + pd.testing.assert_frame_equal(pd.read_sql(f"select * from {table}", conn), df) + conn.close() + + matrices = Sparse.from_disk( + (pathlib.Path(self.project.project_base_path) / "matrices" / "sl").with_suffix(".omx") + ) + + for name, matrix in self.rc.sl_od_matrix.items(): + np.testing.assert_allclose(matrix.to_scipy().A, matrices[name].A) + def generate_line_strings(project, graph, results): """Debug method""" From 92496c01ec797c162bb5799dd480d83f1da09133 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Wed, 22 May 2024 17:35:30 +1000 Subject: [PATCH 47/52] WIP: add LP to BFSLE, each depth penalises the next depths base graph --- aequilibrae/paths/route_choice.py | 4 ++-- aequilibrae/paths/route_choice_set.pxd | 1 + aequilibrae/paths/route_choice_set.pyx | 24 ++++++++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 00753f4ad..63f10a51f 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -22,8 +22,8 @@ class RouteChoice: all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] default_paramaters = { - "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100}, - "link-penalisation": {"penalty": 1.1}, + "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.1}, + "link-penalisation": {}, "bfsle": {"beta": 1.0, "theta": 1.0}, } diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index c4a016194..7c86ded04 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -180,6 +180,7 @@ cdef class RouteChoiceSet: long long [:] thread_conn, long long [:] thread_b_nodes, long long [:] _thread_reached_first, + double penatly, unsigned int seed ) noexcept nogil diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index a55e8e1e5..88c92c8ff 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -12,7 +12,7 @@ from libc.math cimport INFINITY, exp, pow from libc.stdlib cimport abort from libc.string cimport memcpy from libcpp cimport nullptr -from libcpp.algorithm cimport lower_bound, reverse, sort +from libcpp.algorithm cimport lower_bound, reverse, sort, copy from libcpp.unordered_map cimport unordered_map from libcpp.unordered_set cimport unordered_set from libcpp.utility cimport pair @@ -277,11 +277,11 @@ cdef class RouteChoiceSet: if max_routes < 0 or max_depth < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") - if penalty != 0.0 and bfsle: - raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once") + # if penalty != 0.0 and bfsle: + # raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once") - if not bfsle and penalty <= 1.0: - raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") + # if penalty <= 1.0: + # raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") if path_size_logit and (beta < 0 or theta <= 0): raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model") @@ -410,6 +410,7 @@ cdef class RouteChoiceSet: conn_matrix[threadid()], b_nodes_matrix[threadid()], _reached_first_matrix[threadid()], + penalty, c_seed, ) else: @@ -562,6 +563,7 @@ cdef class RouteChoiceSet: long long [:] thread_conn, long long [:] thread_b_nodes, long long [:] _thread_reached_first, + double penatly, unsigned int seed ) noexcept nogil: """Main method for route set generation. See top of file for commentary.""" @@ -579,6 +581,11 @@ cdef class RouteChoiceSet: pair[RouteSet_t.iterator, bool] status unsigned int miss_count = 0 long long p, connector + vector[double] penalised_cost = vector[double](self.cost_view.shape[0]) + vector[double] next_penalised_cost = vector[double](self.cost_view.shape[0]) + + copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin()) + copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin()) max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX @@ -600,7 +607,7 @@ cdef class RouteChoiceSet: for banned in queue: # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and # reverse them as well - memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) + memcpy(&thread_cost[0], &penalised_cost[0], penalised_cost.size() * sizeof(double)) for connector in deref(banned): thread_cost[connector] = INFINITY @@ -628,6 +635,7 @@ cdef class RouteChoiceSet: while p != origin_index: connector = thread_conn[p] p = thread_predecessors[p] + next_penalised_cost[connector] *= penatly vec.push_back(connector) reverse(vec.begin(), vec.end()) @@ -657,6 +665,8 @@ cdef class RouteChoiceSet: queue.swap(next_queue) next_queue.clear() + copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin()) + # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those for banned in queue: del banned @@ -900,6 +910,8 @@ cdef class RouteChoiceSet: return prob_vec +TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later) + @cython.embedsignature(True) def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0): """ From daa3aeebed2870d6ceccd9a7e73a0b47323c4478 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 28 May 2024 13:18:10 +1000 Subject: [PATCH 48/52] Add optional link penalisation to BFSLE --- aequilibrae/paths/route_choice.py | 12 +++-- aequilibrae/paths/route_choice_set.pyx | 66 ++++++++++++++++++-------- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 63f10a51f..dcf6f05df 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -22,7 +22,7 @@ class RouteChoice: all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] default_paramaters = { - "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.1}, + "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01}, "link-penalisation": {}, "bfsle": {"beta": 1.0, "theta": 1.0}, } @@ -64,8 +64,7 @@ def __rc(self) -> RouteChoiceSet: return RouteChoiceSet(self.graph) def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: - """ - Chooses the assignment algorithm and set parameters. + """Chooses the assignment algorithm and set parameters. Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'. BFSLE implementation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, @@ -77,7 +76,6 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: Setting the parameters for the route choice: `beta`, `theta`, and `seed` are BFSLE specific parameters. - `penalty` is a link penalisation specific parameter. Setting `max_depth` or `max_misses`, while not required, is strongly recommended to prevent runaway algorithms. `max_misses` is the maximum amount of duplicate routes found per OD pair. If it is exceeded then the route set @@ -94,6 +92,12 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes. If it is exceeded then the route set if returned with fewer than `max_routes`. + Additionally BFSLE has the option to incorporate link penalisation. Every link in all routes found at a depth + are penalised with the `penalty` factor for the next depth. So at a depth of 0 no links are penalised nor + removed. At depth 1, all links found at depth 0 are penalised, then the links marked for removal are removed. + All links in the routes found at depth 1 are then penalised for the next depth. The penalisation compounds. + Pass set `penalty=1.0` to disable. + :Arguments: **algorithm** (:obj:`str`): Algorithm to be used **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 88c92c8ff..501d791bb 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -19,6 +19,8 @@ from libcpp.utility cimport pair from libcpp.vector cimport vector from openmp cimport omp_get_max_threads +from libc.stdio cimport fprintf, stderr + import itertools import logging import pathlib @@ -239,7 +241,7 @@ cdef class RouteChoiceSet: cores: int = 0, a_star: bool = True, bfsle: bool = True, - penalty: float = 0.0, + penalty: float = 1.0, where: Optional[str] = None, path_size_logit: bool = False, beta: float = 1.0, @@ -264,8 +266,7 @@ cdef class RouteChoiceSet: Default of ``0`` for all available. **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``. - **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible - with ``bfsle=True``. + **penalty** (:obj:`float`): Penalty to use for Link Penalisation and BFSLE with LP. **where** (:obj:`str`): Optional file path to save results to immediately. Will return None. """ cdef: @@ -277,12 +278,6 @@ cdef class RouteChoiceSet: if max_routes < 0 or max_depth < 0: raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative") - # if penalty != 0.0 and bfsle: - # raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once") - - # if penalty <= 1.0: - # raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended") - if path_size_logit and (beta < 0 or theta <= 0): raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model") @@ -568,24 +563,29 @@ cdef class RouteChoiceSet: ) noexcept nogil: """Main method for route set generation. See top of file for commentary.""" cdef: + # Output RouteSet_t *route_set + + # Scratch objects LinkSet_t removed_links minstd_rand rng - # Scratch objects + # These objects are juggled to prevent more allocations than necessary vector[unordered_set[long long] *] queue vector[unordered_set[long long] *] next_queue unordered_set[long long] *banned unordered_set[long long] *new_banned + + # Local variables, Cython doesn't allow conditional declarations vector[long long] *vec pair[RouteSet_t.iterator, bool] status unsigned int miss_count = 0 long long p, connector - vector[double] penalised_cost = vector[double](self.cost_view.shape[0]) - vector[double] next_penalised_cost = vector[double](self.cost_view.shape[0]) - copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin()) - copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin()) + # Link penalisation, only used when penalty != 1.0 + bint lp = penatly != 1.0 + vector[double] *penalised_cost = nullptr + vector[double] *next_penalised_cost = nullptr max_routes = max_routes if max_routes != 0 else UINT_MAX max_depth = max_depth if max_depth != 0 else UINT_MAX @@ -594,6 +594,13 @@ cdef class RouteChoiceSet: route_set = new RouteSet_t() rng.seed(seed) + if lp: + # Although we don't need the dynamic ability of vectors here, Cython doesn't have the std::array module. + penalised_cost = new vector[double](self.cost_view.shape[0]) + next_penalised_cost = new vector[double](self.cost_view.shape[0]) + copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin()) + copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin()) + # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned # edges to consider for depth in range(max_depth): @@ -605,9 +612,12 @@ cdef class RouteChoiceSet: shuffle(queue.begin(), queue.end(), rng) for banned in queue: - # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and - # reverse them as well - memcpy(&thread_cost[0], &penalised_cost[0], penalised_cost.size() * sizeof(double)) + if lp: + # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation + copy(penalised_cost.cbegin(), penalised_cost.cend(), &thread_cost[0]) + else: + # Otherwise we just copy directly from the cost view + memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) for connector in deref(banned): thread_cost[connector] = INFINITY @@ -635,9 +645,15 @@ cdef class RouteChoiceSet: while p != origin_index: connector = thread_conn[p] p = thread_predecessors[p] - next_penalised_cost[connector] *= penatly vec.push_back(connector) + if lp: + # Here we penalise all seen links for the *next* depth. If we penalised on the current depth + # then we would introduce a bias for earlier seen paths + for connector in deref(vec): + # *= does not work + deref(next_penalised_cost)[connector] = penatly * deref(next_penalised_cost)[connector] + reverse(vec.begin(), vec.end()) for connector in deref(vec): @@ -665,7 +681,10 @@ cdef class RouteChoiceSet: queue.swap(next_queue) next_queue.clear() - copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin()) + if lp: + # Update the penalised_cost vector, since next_penalised_cost is always the one updated we just need to + # bring penalised_cost up to date. + copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin()) # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those for banned in queue: @@ -675,6 +694,11 @@ cdef class RouteChoiceSet: for banned in removed_links: del banned + if lp: + # If we had enabled link penalisation, we'll need to free those vectors as well + del penalised_cost + del next_penalised_cost + return route_set @cython.wraparound(False) @@ -737,7 +761,7 @@ cdef class RouteChoiceSet: vec.push_back(connector) for connector in deref(vec): - thread_cost[connector] *= penatly + thread_cost[connector] = penatly * thread_cost[connector] reverse(vec.begin(), vec.end()) @@ -910,7 +934,7 @@ cdef class RouteChoiceSet: return prob_vec -TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later) +# TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later) @cython.embedsignature(True) def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0): From 2e829c661eeb95f6ad2f9f215546fd00de55a33b Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 28 May 2024 16:28:49 +1000 Subject: [PATCH 49/52] Add binary logit cut offs for assignment --- aequilibrae/paths/route_choice.py | 13 +++++- aequilibrae/paths/route_choice_set.pxd | 3 +- aequilibrae/paths/route_choice_set.pyx | 57 ++++++++++++++++++++------ 3 files changed, 57 insertions(+), 16 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index dcf6f05df..4a21e4695 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -22,7 +22,7 @@ class RouteChoice: all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"] default_paramaters = { - "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01}, + "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 1.0}, "link-penalisation": {}, "bfsle": {"beta": 1.0, "theta": 1.0}, } @@ -98,6 +98,15 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None: All links in the routes found at depth 1 are then penalised for the next depth. The penalisation compounds. Pass set `penalty=1.0` to disable. + When performing an assignment, `cutoff_prob` can be provided to exclude routes from the path-sized logit model. + The `cutoff_prob` is used to compute an inverse binary logit and obtain a max difference in utilities. If a + paths total cost is greater than the minimum cost path in the route set plus the max difference, the route is + excluded from the PSL calculations. The route is still returned, but with a probability of 0.0. + + The `cutoff_prob` should be in the range [0, 1]. It is then rescaled internally to [0.5, 1] as probabilities + below 0.5 produce negative differences in utilities. A higher `cutoff_prob` includes more routes. A value of + `0.0` will only include the minimum cost route. A value of `1.0` includes all routes. + :Arguments: **algorithm** (:obj:`str`): Algorithm to be used **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm @@ -131,7 +140,7 @@ def set_cores(self, cores: int) -> None: self.cores = cores def set_save_path_files(self, save_it: bool) -> None: - """Turn path saving on or off. + """turn path saving on or off. :Arguments: **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd index 7c86ded04..85f675972 100644 --- a/aequilibrae/paths/route_choice_set.pxd +++ b/aequilibrae/paths/route_choice_set.pxd @@ -219,7 +219,8 @@ cdef class RouteChoiceSet: vector[double] &total_cost, vector[double] &path_overlap_vec, double beta, - double theta + double theta, + double cutoff_prob ) noexcept nogil @staticmethod diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 501d791bb..476816ae1 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -8,11 +8,11 @@ from cython.operator cimport dereference as deref from cython.operator cimport preincrement as inc from cython.parallel cimport parallel, prange, threadid from libc.limits cimport UINT_MAX -from libc.math cimport INFINITY, exp, pow +from libc.math cimport INFINITY, exp, pow, log from libc.stdlib cimport abort from libc.string cimport memcpy from libcpp cimport nullptr -from libcpp.algorithm cimport lower_bound, reverse, sort, copy +from libcpp.algorithm cimport lower_bound, reverse, sort, copy, min_element from libcpp.unordered_map cimport unordered_map from libcpp.unordered_set cimport unordered_set from libcpp.utility cimport pair @@ -246,6 +246,7 @@ cdef class RouteChoiceSet: path_size_logit: bool = False, beta: float = 1.0, theta: float = 1.0, + cutoff_prob: float = 1.0, ): """Compute the a route set for a list of OD pairs. @@ -281,6 +282,9 @@ cdef class RouteChoiceSet: if path_size_logit and (beta < 0 or theta <= 0): raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model") + if path_size_logit and not 0.0 <= cutoff_prob <= 1.0: + raise ValueError("`cutoff_prob` must be 0 <= `cutoff_prob` <= 1 for path sized logit model") + for o, d in ods: if self.nodes_to_indices_view[o] == -1: raise ValueError(f"Origin {o} is not present within the compact graph") @@ -295,6 +299,9 @@ cdef class RouteChoiceSet: unsigned int c_seed = seed unsigned int c_cores = cores if cores > 0 else omp_get_max_threads() + # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values + double scaled_cutoff_prob = cutoff_prob * 0.5 + 0.5 + vector[pair[long long, long long]] c_ods # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this @@ -439,7 +446,8 @@ cdef class RouteChoiceSet: deref(deref(cost_set)[i]), deref(deref(path_overlap_set)[i]), beta, - theta + theta, + scaled_cutoff_prob ) # While we need the unique sorted links (.first), we don't need the frequencies (.second) del freq_pair.second @@ -909,9 +917,15 @@ cdef class RouteChoiceSet: vector[double] &total_cost, vector[double] &path_overlap_vec, double beta, - double theta + double theta, + double cutoff_prob ) noexcept nogil: - """Compute a probability for each route in the route set based on the path overlap.""" + """Compute a probability for each route in the route set based on the path overlap. + + Computes a binary logit between the minimum cost path and each path, if the total cost is greater than the + minimum + the difference in utilities required to produce the cut-off probability then the route is excluded from + the route set. + """ cdef: # Scratch objects vector[double] *prob_vec @@ -919,23 +933,31 @@ cdef class RouteChoiceSet: long long route_set_idx size_t i, j + vector[bool] route_mask = vector[bool](total_cost.size()) + double cutoff_cost = deref(min_element(total_cost.cbegin(), total_cost.cend())) \ + + inverse_binary_logit(cutoff_prob, 0.0, 1.0) + prob_vec = new vector[double]() prob_vec.reserve(total_cost.size()) + for i in range(total_cost.size()): + route_mask[i] = total_cost[i] <= cutoff_cost + # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will # lead to NaN results for i in range(total_cost.size()): - inv_prob = 0.0 - for j in range(total_cost.size()): - inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \ - * exp(-theta * (total_cost[j] - total_cost[i])) - - prob_vec.push_back(1.0 / inv_prob) + if route_mask[i]: + inv_prob = 0.0 + for j in range(total_cost.size()): + if route_mask[j]: + inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \ + * exp(-theta * (total_cost[j] - total_cost[i])) + prob_vec.push_back(1.0 / inv_prob) + else: + prob_vec.push_back(0.0) return prob_vec -# TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later) - @cython.embedsignature(True) def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0): """ @@ -1426,3 +1448,12 @@ cdef class Checkpoint: @staticmethod def batches(ods: List[Tuple[int, int]]): return (list(g) for k, g in itertools.groupby(sorted(ods), key=lambda x: x[0])) + + +cdef double inverse_binary_logit(double prob, double beta0, double beta1) noexcept nogil: + if prob == 1.0: + return INFINITY + elif prob == 0.0: + return -INFINITY + else: + return (log(prob / (1.0 - prob)) - beta0) / beta1 From 4ea4e0db3250a4371d962b281040479c780aff55 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 28 May 2024 16:43:23 +1000 Subject: [PATCH 50/52] Update tests --- aequilibrae/paths/route_choice.py | 2 +- tests/aequilibrae/paths/test_route_choice.py | 36 ++++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py index 4a21e4695..c5b1e553f 100644 --- a/aequilibrae/paths/route_choice.py +++ b/aequilibrae/paths/route_choice.py @@ -24,7 +24,7 @@ class RouteChoice: default_paramaters = { "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 1.0}, "link-penalisation": {}, - "bfsle": {"beta": 1.0, "theta": 1.0}, + "bfsle": {"beta": 1.0, "theta": 1.0, "penalty": 1.0}, } def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None): diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py index de3f257de..5b3fbc88f 100644 --- a/tests/aequilibrae/paths/test_route_choice.py +++ b/tests/aequilibrae/paths/test_route_choice.py @@ -45,7 +45,7 @@ def test_route_choice(self): rc = RouteChoiceSet(self.graph) a, b = 1, 20 - for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]: + for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}, {"bfsle": True, "penalty": 1.1}]: with self.subTest(**kwargs): results = rc.run(a, b, max_routes=10, **kwargs) self.assertEqual(len(results), 10, "Returned more routes than expected") @@ -150,10 +150,6 @@ def test_route_choice_exceptions(self): with self.assertRaises(ValueError): rc.run(a, b, max_routes=max_routes, max_depth=max_depth) - with self.assertRaises(ValueError): - rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5) - rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1) - def test_round_trip(self): np.random.seed(1000) rc = RouteChoiceSet(self.graph) @@ -210,12 +206,15 @@ def test_prob_results(self): np.random.seed(0) rc = RouteChoiceSet(self.graph) nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)] - rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True) - table = rc.get_results().to_pandas() - gb = table.groupby(by=["origin id", "destination id"]) - for od, df in gb: - self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0") + for kwargs in [{"cutoff_prob": 0.0}, {"cutoff_prob": 0.5}, {"cutoff_prob": 1.0}]: + with self.subTest(**kwargs): + rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True, **kwargs) + table = rc.get_results().to_pandas() + + gb = table.groupby(by=["origin id", "destination id"]) + for od, df in gb: + self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0") def test_link_loading(self): np.random.seed(0) @@ -380,21 +379,28 @@ def test_set_save_routes(self): def test_set_choice_set_generation(self): self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1) self.assertDictEqual( - self.rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0} + self.rc.parameters, + {"seed": 0, "max_routes": 20, "max_depth": 0, "max_misses": 100, "penalty": 1.1, "cutoff_prob": 1.0}, ) self.rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1) self.assertDictEqual( self.rc.parameters, - {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}, + { + "seed": 0, + "max_routes": 20, + "max_depth": 0, + "max_misses": 100, + "beta": 1.1, + "theta": 1.0, + "penalty": 1.0, + "cutoff_prob": 1.0, + }, ) with self.assertRaises(ValueError): self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0) - with self.assertRaises(ValueError): - self.rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1) - with self.assertRaises(AttributeError): self.rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1) From d552901bdb058e87597737b95d1efaf0b6faf94b Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 28 May 2024 16:48:46 +1000 Subject: [PATCH 51/52] Update example --- .../trip_distribution/plot_route_choice.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py index a8d1990f7..4c00f0949 100644 --- a/docs/source/examples/trip_distribution/plot_route_choice.py +++ b/docs/source/examples/trip_distribution/plot_route_choice.py @@ -90,6 +90,23 @@ # based on the paper # "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013). # https://doi.org/10.1080/18128602.2012.671383 +# +# Our BFSLE implementation is slightly different and has extended to allow applying link penalisation as well. Every +# link in all routes found at a depth are penalised with the `penalty` factor for the next depth. So at a depth of 0 no +# links are penalised nor removed. At depth 1, all links found at depth 0 are penalised, then the links marked for +# removal are removed. All links in the routes found at depth 1 are then penalised for the next depth. The penalisation +# compounds. Pass set `penalty=1.0` to disable. +# +# To assist in filtering out bad results during the assignment, a `cutoff_prob` parameter can be provided to exclude +# routes from the path-sized logit model. The `cutoff_prob` is used to compute an inverse binary logit and obtain a max +# difference in utilities. If a paths total cost is greater than the minimum cost path in the route set plus the max +# difference, the route is excluded from the PSL calculations. The route is still returned, but with a probability of +# 0.0. +# +# The `cutoff_prob` should be in the range [0, 1]. It is then rescaled internally to [0.5, 1] as probabilities below 0.5 +# produce negative differences in utilities. A higher `cutoff_prob` includes more routes. A value of `0.0` will only +# include the minimum cost route. A value of `1.0` includes all routes. +# # It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results. # rc.set_choice_set_generation("link-penalisation", max_routes=5, penalty=1.1) From 086a2dbe868ade1c17bfa22038474fe11dd9f666 Mon Sep 17 00:00:00 2001 From: Jake-Moss Date: Tue, 28 May 2024 17:00:59 +1000 Subject: [PATCH 52/52] Some nicer comments --- aequilibrae/paths/route_choice_set.pyx | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx index 476816ae1..a726724f8 100644 --- a/aequilibrae/paths/route_choice_set.pyx +++ b/aequilibrae/paths/route_choice_set.pyx @@ -299,7 +299,7 @@ cdef class RouteChoiceSet: unsigned int c_seed = seed unsigned int c_cores = cores if cores > 0 else omp_get_max_threads() - # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values + # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values. double scaled_cutoff_prob = cutoff_prob * 0.5 + 0.5 vector[pair[long long, long long]] c_ods @@ -371,7 +371,7 @@ cdef class RouteChoiceSet: results.resize(batch_len) if path_size_logit: - # we may clear these objects because it's either: + # We may clear these objects because it's either: # - the first iteration and they contain no elements, thus no memory to leak # - the internal objects were freed by the previous iteration link_union_set.clear() @@ -621,10 +621,10 @@ cdef class RouteChoiceSet: for banned in queue: if lp: - # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation + # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation, copy(penalised_cost.cbegin(), penalised_cost.cend(), &thread_cost[0]) else: - # Otherwise we just copy directly from the cost view + # ...otherwise we just copy directly from the cost view. memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double)) for connector in deref(banned): @@ -940,6 +940,7 @@ cdef class RouteChoiceSet: prob_vec = new vector[double]() prob_vec.reserve(total_cost.size()) + # The route mask should be True for the routes we wish to include. for i in range(total_cost.size()): route_mask[i] = total_cost[i] <= cutoff_cost @@ -949,11 +950,14 @@ cdef class RouteChoiceSet: if route_mask[i]: inv_prob = 0.0 for j in range(total_cost.size()): + # We must skip any other routes that are not included in the mask otherwise our probabilities + # won't add up. if route_mask[j]: inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \ * exp(-theta * (total_cost[j] - total_cost[i])) prob_vec.push_back(1.0 / inv_prob) else: + # Anything that has been excluded gets a probability of 0 rather than be removed entirely. prob_vec.push_back(0.0) return prob_vec @@ -1336,7 +1340,7 @@ cdef class RouteChoiceSet: for i in range(ods.size()): route_set = route_sets[i] - # Instead of construction a "list of lists" style object for storing the route sets we instead will + # Instead of constructing a "list of lists" style object for storing the route sets we instead will # construct one big array of link IDs with a corresponding offsets array that indicates where each new row # (path) starts. for route in deref(route_set):