From b339016d6f63cedb8e76bb53ab7b42392472c34d Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Fri, 1 Mar 2024 14:40:29 +1000
Subject: [PATCH 01/52] Scratch working for link loading

---
 aequilibrae/paths/route_choice.pxd           |   9 +
 aequilibrae/paths/route_choice.pyx           | 242 ++++++++++++++++---
 tests/aequilibrae/paths/test_route_choice.py |   9 +-
 3 files changed, 221 insertions(+), 39 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd
index bf4671d47..e173e0cc7 100644
--- a/aequilibrae/paths/route_choice.pxd
+++ b/aequilibrae/paths/route_choice.pxd
@@ -140,6 +140,13 @@ cdef class RouteChoiceSet:
         bint block_flows_through_centroids
         bint a_star
 
+        vector[pair[long long, long long]] *ods
+        vector[RouteSet_t *] *results
+        vector[vector[long long] *] *link_union_set
+        vector[vector[double] *] *cost_set
+        vector[vector[double] *] *gamma_set
+        vector[vector[double] *] *prob_set
+
     cdef void path_find(
         RouteChoiceSet self,
         long origin_index,
@@ -202,6 +209,8 @@ cdef class RouteChoiceSet:
         double theta
     ) noexcept nogil
 
+    # cdef void link_loading(self, double[:, :] matrix_view) nogil
+
     @staticmethod
     cdef shared_ptr[libpa.CTable] make_table_from_results(
         vector[pair[long long, long long]] &ods,
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index b22dd0c08..211d292af 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -114,7 +114,12 @@ cdef class RouteChoiceSet:
 
     def __cinit__(self):
         """C level init. For C memory allocation and initialisation. Called exactly once per object."""
-        pass
+        results = <vector[RouteSet_t *] *>nullptr
+        link_union_set = <vector[vector[long long] *] *>nullptr
+        cost_set = <vector[vector[double] *] *>nullptr
+        gamma_set = <vector[vector[double] *] *>nullptr
+        prob_set = <vector[vector[double] *] *>nullptr
+        ods = <vector[pair[long long, long long]] *>nullptr
 
     def __init__(self, graph: Graph):
         """Python level init, may be called multiple times, for things that can't be done in __cinit__."""
@@ -134,13 +139,51 @@ cdef class RouteChoiceSet:
         self.zones = graph.num_zones
         self.block_flows_through_centroids = graph.block_centroid_flows
 
-
     def __dealloc__(self):
         """
         C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
         partially deallocated state already.
         """
-        pass
+        self.deallocate_results()
+
+    def deallocate_results(self):
+        """
+        Deallocate stored results, existing extracted results are not invalidated.
+        """
+        cdef:
+            RouteSet_t *route_set
+            vector[long long] *link_vec
+            vector[double] *double_vec
+
+        if self.results != nullptr:
+            for route_set in deref(self.results):
+                for link_vec in deref(route_set):
+                    del link_vec
+                del route_set
+            del self.results
+
+        if self.link_union_set != nullptr:
+            for link_vec in deref(self.link_union_set):
+                del link_vec
+            del self.link_union_vec
+
+        if self.cost_set != nullptr:
+            for double_vec in deref(self.cost_set):
+                del double_vec
+            del self.cost_vec
+
+        if self.gamma_set != nullptr:
+            for double_vec in deref(self.gamma_set):
+                del double_vec
+            del self.gamma_vec
+
+        if self.prob_set != nullptr:
+            for double_vec in deref(self.prob_set):
+                del double_vec
+            del self.prob_vec
+
+        if self.ods != nullptr:
+            del self.ods
 
     @cython.embedsignature(True)
     def run(self, origin: int, destination: int, *args, **kwargs):
@@ -160,7 +203,8 @@ cdef class RouteChoiceSet:
             **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs.
                                                          Represents paths from ``origin`` to ``destination``.
         """
-        return [tuple(x) for x in self.batched([(origin, destination)], *args, **kwargs).column("route set").to_pylist()]
+        self.batched([(origin, destination)], *args, **kwargs)
+        return [tuple(x) for x in self.get_results().column("route set").to_pylist()]
 
     # Bounds checking doesn't really need to be disabled here but the warning is annoying
     @cython.boundscheck(False)
@@ -199,10 +243,6 @@ cdef class RouteChoiceSet:
             **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``.
             **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``.
             **where** (:obj:`str`): Optional file path to save results to immediately. Will return None.
-
-        :Returns:
-            **route sets** (:obj:`dict[tuple[int, int], list[tuple[int, ...]]]`): Returns a list of unique tuples of compact link IDs for
-                each OD pair provided (as keys). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None.
         """
         cdef:
             long long o, d
@@ -274,26 +314,35 @@ cdef class RouteChoiceSet:
         cdef:
             RouteSet_t *route_set
             pair[vector[long long] *, vector[long long] *] freq_pair
-            vector[long long] *link_union = <vector[long long] *>nullptr
+            vector[long long] *link_union_scratch = <vector[long long] *>nullptr
+            vector[vector[long long] *] *link_union_set = <vector[vector[long long] *] *>nullptr
             vector[vector[double] *] *cost_set = <vector[vector[double] *] *>nullptr
             vector[vector[double] *] *gamma_set = <vector[vector[double] *] *>nullptr
-            vector[vector[double] *] *prob_set= <vector[vector[double] *] *>nullptr
+            vector[vector[double] *] *prob_set = <vector[vector[double] *] *>nullptr
 
         if path_size_logit:
+            link_union_set = new vector[vector[long long] *](max_results_len)
             cost_set = new vector[vector[double] *](max_results_len)
             gamma_set = new vector[vector[double] *](max_results_len)
             prob_set = new vector[vector[double] *](max_results_len)
 
+        self.deallocate_results()  # We have be storing results from a previous run
+
         for batch in batches:
             c_ods = batch  # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer
             batch_len = c_ods.size()
             results.resize(batch_len)  # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed
 
             if path_size_logit:
+                # we may clear these objects because it's either:
+                # - the first iteration and they contain no elements, thus no memory to leak
+                # - the internal objects were freed by the previous iteration
+                link_union_set.clear()
                 cost_set.clear()
                 gamma_set.clear()
                 prob_set.clear()
 
+                link_union_set.resize(batch_len)
                 cost_set.resize(batch_len)
                 gamma_set.resize(batch_len)
                 prob_set.resize(batch_len)
@@ -302,7 +351,7 @@ cdef class RouteChoiceSet:
                 # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
                 # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
                 if path_size_logit:
-                    link_union = new vector[long long]()
+                    link_union_scratch = new vector[long long]()
 
                 for i in prange(batch_len):
                     origin_index = self.nodes_to_indices_view[c_ods[i].first]
@@ -349,13 +398,13 @@ cdef class RouteChoiceSet:
                         )
 
                     if path_size_logit:
-                        link_union.clear()
-                        freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union))
+                        link_union_scratch.clear()
+                        freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch))
+                        deref(link_union_set)[i] = freq_pair.first
                         deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view)
                         deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view)
                         deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta)
-                        del freq_pair.first
-                        del freq_pair.second
+                        del freq_pair.second  # While we need the unique sorted links (.first), we don't need the frequencies (.second)
 
                     deref(results)[i] = route_set
 
@@ -370,38 +419,50 @@ cdef class RouteChoiceSet:
                         )
 
                 if path_size_logit:
-                    del link_union
+                    del link_union_scratch
+
+            if where is not None:
+                table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set))
 
-            table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set))
+                # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures
+                if path_size_logit:
+                    for j in range(batch_len):
+                        del deref(link_union_set)[j]
+                        del deref(cost_set)[j]
+                        del deref(gamma_set)[j]
+                        del deref(prob_set)[j]
 
-            # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures
-            if path_size_logit:
                 for j in range(batch_len):
-                    del deref(cost_set)[j]
-                    del deref(gamma_set)[j]
-                    del deref(prob_set)[j]
+                    for route in deref(deref(results)[j]):
+                        del route
+                    del deref(results)[j]
 
-            for j in range(batch_len):
-                for route in deref(deref(results)[j]):
-                    del route
-
-            if where is not None:
                 checkpoint.write(table)
                 del table
             else:
-                break  # There was only one batch anyway
+                pass  # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in memory
 
-        # We're done with everything now, we can free the outer internal structures
-        if path_size_logit:
-            del cost_set
-            del gamma_set
-            del prob_set
+        # Here we decide if we wish to preserve our results for later saving/link loading
+        if where is not None:
+            # We're done with everything now, we can free the outer internal structures
             del results
-
-        if where is None:
-            return table
+            if path_size_logit:
+                del link_union_set
+                del cost_set
+                del gamma_set
+                del prob_set
         else:
-            return
+            self.results = results
+            self.link_union_set = link_union_set
+            self.cost_set = cost_set
+            self.gamma_set = gamma_set
+            self.prob_set = prob_set
+
+            # Copy the c_ods vector, it was provided by the auto Cython conversion and is allocated on the stack,
+            # we should copy it to keep it around
+            self.ods = new vector[pair[long long, long long]](c_ods)
+
+            # self.link_union ?? This could be saved as a partial results from the computation above, although it isn't easy to get out rn
 
     @cython.initializedcheck(False)
     cdef void path_find(
@@ -744,6 +805,92 @@ cdef class RouteChoiceSet:
 
         return prob_vec
 
+    def link_loading(self, double[:, :] matrix_view):
+        if self.ods == nullptr \
+           or self.link_union_set == nullptr \
+           or self.prob_set == nullptr:
+            raise ValueError("link loading requires Route Choice path_size_logit results")
+
+        cdef:
+            vector[double] *loads
+            vector[double] *route_set_prob
+
+            vector[long long] *link_union
+            vector[long long].const_iterator link_union_iter
+
+            vector[long long] *links
+            vector[long long].const_iterator link_iter
+
+            vector[double].const_iterator prob_iter
+
+            RouteSet_t *route_set
+            double demand, load, prob
+            size_t length
+            long origin_index, dest_index
+            int i
+
+        fprintf(stderr, "starting link loading\n")
+        with nogil, parallel(num_threads=1):
+            # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
+            # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
+            fprintf(stderr, "core: %d\n", threadid())
+
+            for i in prange(self.ods.size()):
+                origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
+                dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
+                demand = matrix_view[origin_index, dest_index]
+                fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
+
+                route_set = deref(self.results)[i]
+                fprintf(stderr, "got route set\n")
+                link_union = deref(self.link_union_set)[i]
+                fprintf(stderr, "got link union\n")
+                route_set_prob = deref(self.prob_set)[i]
+                fprintf(stderr, "got route set probsk\n")
+
+                fprintf(stderr, "making new loads vector\n")
+                loads = new vector[double](link_union.size(), 0.0)
+
+                fprintf(stderr, "starting route iteration\n")
+                # We now iterate over all routes in the route_set, each route has an associated probability
+                route_prob_iter = route_set_prob.cbegin()
+                for route in deref(route_set):
+                    load = demand * deref(route_prob_iter)
+                    inc(route_prob_iter)
+
+                    if load == 0.0:
+                        continue
+
+                    # For each link in the route, we need to assign the appropriate demand * prob
+                    # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
+                    # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
+                    # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
+                    # This is also incredibly cache efficient, the only downsides are that the code is harder to read
+                    # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
+                    # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
+                    # then used an N-way merge we could reuse the sorted routes and the sorted link union.
+                    links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved
+                    sort(links.begin(), links.end())
+
+                    # links and link_union are sorted, and links is a subset of link_union
+                    link_union_iter = link_union.cbegin()
+                    link_iter = links.cbegin()
+
+                    # fprintf(stderr, "starting link iteration\n")
+                    while link_iter != links.cend():
+                        # Find the next location for the current link in links
+                        while deref(link_iter) != deref(link_union_iter):
+                            inc(link_union_iter)
+
+                        fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter))
+                        deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + load
+
+                        inc(link_iter)
+
+                with gil:
+                    print(origin_index, dest_index, deref(loads))
+
+
     @cython.wraparound(False)
     @cython.embedsignature(True)
     @cython.boundscheck(False)
@@ -837,6 +984,27 @@ cdef class RouteChoiceSet:
 
         return table
 
+    def get_results(self):  # Cython doesn't like this type annotation... -> pa.Table:
+        """
+        :Returns:
+            **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of compact link IDs for
+                each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None.
+        """
+        if self.results == nullptr or self.ods == nullptr:
+            raise ValueError("Route Choice results not computed yet")
+
+        table = libpa.pyarrow_wrap_table(
+            RouteChoiceSet.make_table_from_results(
+                deref(self.ods),
+                deref(self.results),
+                self.cost_set,
+                self.gamma_set,
+                self.prob_set
+            )
+        )
+
+        return table
+
 
 @cython.embedsignature(True)
 cdef class Checkpoint:
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index b68f64d97..c02c4ad1c 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -30,6 +30,9 @@ def setUp(self) -> None:
         self.graph.set_graph("distance")
         self.graph.set_blocked_centroid_flows(False)
 
+        self.mat = self.project.matrices.get_matrix("demand_omx")
+        self.mat.computational_view()
+
     def tearDown(self) -> None:
         self.project.close()
 
@@ -169,8 +172,10 @@ def test_cost_results(self):
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
-        table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
-        table = table.to_pandas()
+        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+
+        table = rc.get_results().to_pandas()
+        breakpoint()
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:

From dbe42bc609e96201a7581f3f1a3d9f1b57f7d422 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Fri, 1 Mar 2024 16:02:57 +1000
Subject: [PATCH 02/52] Rudimentary link loading and path file generation

---
 aequilibrae/paths/route_choice.pxd |   1 +
 aequilibrae/paths/route_choice.pyx | 123 ++++++++++++++++-------------
 2 files changed, 70 insertions(+), 54 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd
index e173e0cc7..a12fd6c72 100644
--- a/aequilibrae/paths/route_choice.pxd
+++ b/aequilibrae/paths/route_choice.pxd
@@ -136,6 +136,7 @@ cdef class RouteChoiceSet:
         long long [:] ids_graph_view
         long long [:] compressed_link_ids
         long long num_nodes
+        long long num_links
         long long zones
         bint block_flows_through_centroids
         bint a_star
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index 211d292af..d1b67f8a5 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -136,6 +136,7 @@ cdef class RouteChoiceSet:
 
         self.ids_graph_view = graph.compact_graph.id.values
         self.num_nodes = graph.compact_num_nodes
+        self.num_links = graph.compact_num_links
         self.zones = graph.num_zones
         self.block_flows_through_centroids = graph.block_centroid_flows
 
@@ -814,6 +815,8 @@ cdef class RouteChoiceSet:
         cdef:
             vector[double] *loads
             vector[double] *route_set_prob
+            vector[double] *collective_link_loads = new vector[double](self.num_links)  # FIXME FREE ME
+            vector[vector[double] *] *link_loads = new vector[vector[double] *](self.ods.size())  # FIXME FREE ME
 
             vector[long long] *link_union
             vector[long long].const_iterator link_union_iter
@@ -830,65 +833,77 @@ cdef class RouteChoiceSet:
             int i
 
         fprintf(stderr, "starting link loading\n")
-        with nogil, parallel(num_threads=1):
-            # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
-            # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
-            fprintf(stderr, "core: %d\n", threadid())
+        with nogil:
+            with parallel(num_threads=1):
+                # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
+                # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
+                fprintf(stderr, "core: %d\n", threadid())
+
+                for i in prange(self.ods.size()):
+                    fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
+
+                    route_set = deref(self.results)[i]
+                    fprintf(stderr, "got route set\n")
+                    link_union = deref(self.link_union_set)[i]
+                    fprintf(stderr, "got link union\n")
+                    route_set_prob = deref(self.prob_set)[i]
+                    fprintf(stderr, "got route set probsk\n")
+
+                    fprintf(stderr, "making new loads vector\n")
+                    loads = new vector[double](link_union.size(), 0.0)  # FIXME FREE ME
+
+                    fprintf(stderr, "starting route iteration\n")
+                    # We now iterate over all routes in the route_set, each route has an associated probability
+                    route_prob_iter = route_set_prob.cbegin()
+                    for route in deref(route_set):
+                        prob = deref(route_prob_iter)
+                        inc(route_prob_iter)
+
+                        if prob == 0.0:
+                            continue
+
+                        # For each link in the route, we need to assign the appropriate demand * prob
+                        # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
+                        # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
+                        # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
+                        # This is also incredibly cache efficient, the only downsides are that the code is harder to read
+                        # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
+                        # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
+                        # then used an N-way merge we could reuse the sorted routes and the sorted link union.
+                        links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved  # FIXME FREE ME
+                        sort(links.begin(), links.end())
+
+                        # links and link_union are sorted, and links is a subset of link_union
+                        link_union_iter = link_union.cbegin()
+                        link_iter = links.cbegin()
+
+                        # fprintf(stderr, "starting link iteration\n")
+                        while link_iter != links.cend():
+                            # Find the next location for the current link in links
+                            while deref(link_iter) != deref(link_union_iter):
+                                inc(link_union_iter)
+
+                            fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter))
+                            deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob
+
+                            inc(link_iter)
+
+                    deref(link_loads)[i] = loads
+                    with gil:
+                        print("path file:", origin_index, dest_index, deref(loads))
+
+            for i in range(self.ods.size()):
+                loads = deref(link_loads)[i]
+                link_union = deref(self.link_union_set)[i]
 
-            for i in prange(self.ods.size()):
                 origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
                 dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
                 demand = matrix_view[origin_index, dest_index]
-                fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
 
-                route_set = deref(self.results)[i]
-                fprintf(stderr, "got route set\n")
-                link_union = deref(self.link_union_set)[i]
-                fprintf(stderr, "got link union\n")
-                route_set_prob = deref(self.prob_set)[i]
-                fprintf(stderr, "got route set probsk\n")
-
-                fprintf(stderr, "making new loads vector\n")
-                loads = new vector[double](link_union.size(), 0.0)
-
-                fprintf(stderr, "starting route iteration\n")
-                # We now iterate over all routes in the route_set, each route has an associated probability
-                route_prob_iter = route_set_prob.cbegin()
-                for route in deref(route_set):
-                    load = demand * deref(route_prob_iter)
-                    inc(route_prob_iter)
-
-                    if load == 0.0:
-                        continue
-
-                    # For each link in the route, we need to assign the appropriate demand * prob
-                    # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
-                    # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
-                    # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
-                    # This is also incredibly cache efficient, the only downsides are that the code is harder to read
-                    # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
-                    # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
-                    # then used an N-way merge we could reuse the sorted routes and the sorted link union.
-                    links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved
-                    sort(links.begin(), links.end())
-
-                    # links and link_union are sorted, and links is a subset of link_union
-                    link_union_iter = link_union.cbegin()
-                    link_iter = links.cbegin()
-
-                    # fprintf(stderr, "starting link iteration\n")
-                    while link_iter != links.cend():
-                        # Find the next location for the current link in links
-                        while deref(link_iter) != deref(link_union_iter):
-                            inc(link_union_iter)
-
-                        fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter))
-                        deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + load
-
-                        inc(link_iter)
-
-                with gil:
-                    print(origin_index, dest_index, deref(loads))
+                for j in range(link_union.size()):
+                    deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j]
+            with gil:
+                print("link loads:", deref(collective_link_loads))
 
 
     @cython.wraparound(False)

From b12b2a910f2239fe9f63ade7410fd23b1869d9d7 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Fri, 1 Mar 2024 17:08:28 +1000
Subject: [PATCH 03/52] Fix tests and segfaults

---
 aequilibrae/paths/route_choice.pyx           | 15 ++++++++----
 tests/aequilibrae/paths/test_route_choice.py | 24 ++++++++++++--------
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index d1b67f8a5..ae648ec5c 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -162,29 +162,36 @@ cdef class RouteChoiceSet:
                     del link_vec
                 del route_set
             del self.results
+            self.results = <vector[RouteSet_t *] *>nullptr
 
         if self.link_union_set != nullptr:
             for link_vec in deref(self.link_union_set):
                 del link_vec
-            del self.link_union_vec
+            del self.link_union_set
+            self.link_union_set = <vector[vector[long long] *] *>nullptr
 
         if self.cost_set != nullptr:
             for double_vec in deref(self.cost_set):
                 del double_vec
-            del self.cost_vec
+            del self.cost_set
+            self.cost_set = <vector[vector[double] *] *>nullptr
 
         if self.gamma_set != nullptr:
             for double_vec in deref(self.gamma_set):
                 del double_vec
-            del self.gamma_vec
+            del self.gamma_set
+            self.gamma_set = <vector[vector[double] *] *>nullptr
 
         if self.prob_set != nullptr:
             for double_vec in deref(self.prob_set):
                 del double_vec
-            del self.prob_vec
+            del self.prob_set
+            self.prob_set = <vector[vector[double] *] *>nullptr
 
         if self.ods != nullptr:
             del self.ods
+            self.ods = prob_set = <vector[pair[long long, long long]] *>nullptr
+
 
     @cython.embedsignature(True)
     def run(self, origin: int, destination: int, *args, **kwargs):
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index c02c4ad1c..2e632ac05 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -75,8 +75,9 @@ def test_route_choice_empty_path(self):
                 rc = RouteChoiceSet(self.graph)
                 a = 1
 
+                rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs)
                 self.assertFalse(
-                    rc.batched([(a, a)], max_routes=0, max_depth=3, **kwargs),
+                    rc.get_results(),
                     "Route set from self to self should be empty",
                 )
 
@@ -103,7 +104,8 @@ def test_route_choice_batched(self):
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
 
         max_routes = 20
-        results = rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        results = rc.get_results()
 
         gb = results.to_pandas().groupby(by="origin id")
         self.assertEqual(len(gb), len(nodes), "Requested number of route sets not returned")
@@ -121,7 +123,8 @@ def test_route_choice_duplicates_batched(self):
 
         max_routes = 20
         with self.assertWarns(UserWarning):
-            results = rc.batched(nodes, max_routes=max_routes, max_depth=10)
+            rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        results = rc.get_results()
 
         gb = results.to_pandas().groupby(by="origin id")
         self.assertEqual(len(gb), 1, "Duplicates not dropped")
@@ -153,7 +156,8 @@ def test_round_trip(self):
         max_routes = 20
 
         path = join(self.project.project_base_path, "batched results")
-        table = rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        table = rc.get_results().to_pandas()
         rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path)
 
         dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema))
@@ -164,7 +168,7 @@ def test_round_trip(self):
             .reset_index(drop=True)
         )
 
-        table = table.to_pandas().sort_values(by=["origin id", "destination id"]).reset_index(drop=True)
+        table = table.sort_values(by=["origin id", "destination id"]).reset_index(drop=True)
 
         pd.testing.assert_frame_equal(table, new_table)
 
@@ -175,7 +179,7 @@ def test_cost_results(self):
         rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
 
         table = rc.get_results().to_pandas()
-        breakpoint()
+        # breakpoint()
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:
@@ -186,8 +190,8 @@ def test_gamma_results(self):
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
-        table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
-        table = table.to_pandas()
+        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+        table = rc.get_results().to_pandas()
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:
@@ -197,8 +201,8 @@ def test_prob_results(self):
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
-        table = rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
-        table = table.to_pandas()
+        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+        table = rc.get_results().to_pandas()
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:

From 79ac2eabd27d06056938a9703a958eaa892eb286 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Fri, 1 Mar 2024 17:09:05 +1000
Subject: [PATCH 04/52] Scratch comments

---
 aequilibrae/paths/route_choice.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index ae648ec5c..908f85ddf 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -847,7 +847,6 @@ cdef class RouteChoiceSet:
                 fprintf(stderr, "core: %d\n", threadid())
 
                 for i in prange(self.ods.size()):
-                    fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
 
                     route_set = deref(self.results)[i]
                     fprintf(stderr, "got route set\n")
@@ -890,14 +889,14 @@ cdef class RouteChoiceSet:
                             while deref(link_iter) != deref(link_union_iter):
                                 inc(link_union_iter)
 
-                            fprintf(stderr, "adding load of %f to link %d because link %d is in route\n", load, deref(link_union_iter), deref(link_iter))
+                            fprintf(stderr, "adding prob of %f to link %d because link %d is in route\n", prob, deref(link_union_iter), deref(link_iter))
                             deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob
 
                             inc(link_iter)
 
                     deref(link_loads)[i] = loads
                     with gil:
-                        print("path file:", origin_index, dest_index, deref(loads))
+                        print("path file:", deref(loads))
 
             for i in range(self.ods.size()):
                 loads = deref(link_loads)[i]
@@ -906,11 +905,13 @@ cdef class RouteChoiceSet:
                 origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
                 dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
                 demand = matrix_view[origin_index, dest_index]
+                fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
 
                 for j in range(link_union.size()):
                     deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j]
             with gil:
                 print("link loads:", deref(collective_link_loads))
+        return deref(collective_link_loads)
 
 
     @cython.wraparound(False)

From 3bf495bc78f81375a3840a590df0e45d61c4e5c3 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 09:10:07 +1000
Subject: [PATCH 05/52] Separate path file generation and link loading

Add method to map compressed link IDs to network IDs
---
 aequilibrae/paths/graph.py                   |  46 ++++
 aequilibrae/paths/route_choice.pxd           |  14 ++
 aequilibrae/paths/route_choice.pyx           | 239 ++++++++++++-------
 tests/aequilibrae/paths/test_route_choice.py |  12 +
 4 files changed, 227 insertions(+), 84 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index 2a75572c1..02c2e826d 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from os.path import join
 from typing import List, Tuple, Optional
+import functools
 
 import numpy as np
 import pandas as pd
@@ -167,6 +168,12 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None:
             self.__build_compressed_graph()
             self.compact_num_links = self.compact_graph.shape[0]
 
+        # The cache property should be recalculated when the graph has been reprepared
+        try:
+            del self.compressed_link_network_mapping
+        except AttributeError:
+            pass
+
     def __build_compressed_graph(self):
         build_compressed_graph(self)
 
@@ -505,6 +512,45 @@ def save_compressed_correspondence(self, path, mode_name, mode_id):
         node_path = join(path, f"nodes_to_indices_c{mode_name}_{mode_id}.feather")
         pd.DataFrame(self.nodes_to_indices, columns=["node_index"]).to_feather(node_path)
 
+    @functools.cached_property
+    def compressed_link_network_mapping(self):
+        """
+        Two arrays providing a mapping of compressed id to link id.
+
+        Uses sparse compression. Index ``idx`` by the by compressed id and compressed id + 1, the
+        network IDs are then in the range ``idx[id]:idx[id + 1]``.
+
+        .. code-block:: python
+
+            >>> idx, data = graph.compressed_link_network_mapping
+            >>> data[idx[id]:idx[id + 1]]  # ==> Slice of network ID's corresponding to the compressed ID
+
+        Links not in the compressed graph are not contained within the ``data`` array.
+
+        :Returns:
+            **idx** (:obj:`np.array`): index array for ``data``
+            **data** (:obj:`np.array`): array of link ids
+        """
+
+        # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`,
+        # we skip them.
+        filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1]
+        gb = filtered.groupby(by="__compressed_id__", sort=True)
+        idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32)
+        data = np.zeros(len(filtered), dtype=np.uint32)
+
+        i = 0
+        for compressed_id, df in gb:
+            idx[compressed_id] = i
+            values = df.link_id.values
+            for j in range(len(values)):
+                data[i + j] = values[j]
+
+            i += len(values)
+
+        idx[-1] = i
+        return idx, data
+
 
 class Graph(GraphBase):
     def __init__(self, *args, **kwargs):
diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd
index a12fd6c72..32c5f077c 100644
--- a/aequilibrae/paths/route_choice.pxd
+++ b/aequilibrae/paths/route_choice.pxd
@@ -148,6 +148,9 @@ cdef class RouteChoiceSet:
         vector[vector[double] *] *gamma_set
         vector[vector[double] *] *prob_set
 
+        unsigned int [:] mapping_idx
+        unsigned int [:] mapping_data
+
     cdef void path_find(
         RouteChoiceSet self,
         long origin_index,
@@ -212,6 +215,17 @@ cdef class RouteChoiceSet:
 
     # cdef void link_loading(self, double[:, :] matrix_view) nogil
 
+    @staticmethod
+    cdef vector[vector[double] *] *compute_path_files(
+        vector[pair[long long, long long]] &ods,
+        vector[RouteSet_t *] &results,
+        vector[vector[long long] *] &link_union_set,
+        vector[vector[double] *] &prob_set
+    ) noexcept nogil
+
+    cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil
+    cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil
+
     @staticmethod
     cdef shared_ptr[libpa.CTable] make_table_from_results(
         vector[pair[long long, long long]] &ods,
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index 908f85ddf..f186f0715 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -75,6 +75,7 @@ import itertools
 import pathlib
 import logging
 import warnings
+from aequilibrae.matrix import AequilibraeMatrix
 
 cimport numpy as np  # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython.
 cimport pyarrow as pa
@@ -140,6 +141,8 @@ cdef class RouteChoiceSet:
         self.zones = graph.num_zones
         self.block_flows_through_centroids = graph.block_centroid_flows
 
+        self.mapping_idx, self.mapping_data = graph.compressed_link_network_mapping
+
     def __dealloc__(self):
         """
         C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
@@ -813,105 +816,173 @@ cdef class RouteChoiceSet:
 
         return prob_vec
 
-    def link_loading(self, double[:, :] matrix_view):
+    def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False):
         if self.ods == nullptr \
            or self.link_union_set == nullptr \
            or self.prob_set == nullptr:
             raise ValueError("link loading requires Route Choice path_size_logit results")
 
+        if not isinstance(matrix, AequilibraeMatrix):
+            raise ValueError("`matrix` is not an AequilibraE matrix")
+
         cdef:
-            vector[double] *loads
-            vector[double] *route_set_prob
-            vector[double] *collective_link_loads = new vector[double](self.num_links)  # FIXME FREE ME
-            vector[vector[double] *] *link_loads = new vector[vector[double] *](self.ods.size())  # FIXME FREE ME
+            vector[vector[double] *] *path_files = <vector[vector[double] *] *>nullptr
+            vector[double] *ll
+
+        if generate_path_files:
+            path_files = RouteChoiceSet.compute_path_files(
+                deref(self.ods),
+                deref(self.results),
+                deref(self.link_union_set),
+                deref(self.prob_set)
+            )
+            tmp = []
+            for vec in deref(path_files):
+                tmp.append(deref(vec))
+            print(tmp)
+
+        def apply_link_loading_func(m):
+            if generate_path_files:
+                ll = self.apply_link_loading_from_path_files(
+                    m,
+                    deref(path_files),
+                )
+            else:
+                ll = self.apply_link_loading(m)
+            return deref(ll)
+
+        if len(matrix.view_names) == 1:
+            link_loads = apply_link_loading_func(matrix.matrix_view)
+        else:
+            link_loads = {
+                name: apply_link_loading_func(matrix.matrix_view[:, :, i])
+                for i, name in enumerate(matrix.names)
+            }
 
+        return link_loads
+
+
+    @staticmethod
+    cdef vector[vector[double] *] *compute_path_files(
+        vector[pair[long long, long long]] &ods,
+        vector[RouteSet_t *] &results,
+        vector[vector[long long] *] &link_union_set,
+        vector[vector[double] *] &prob_set
+    ) noexcept nogil:
+        cdef:
+            vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size())  # FIXME FREE ME
             vector[long long] *link_union
-            vector[long long].const_iterator link_union_iter
+            vector[double] *loads
+            vector[double] *link
 
-            vector[long long] *links
+            vector[long long].const_iterator link_union_iter
             vector[long long].const_iterator link_iter
 
-            vector[double].const_iterator prob_iter
+            size_t link_loc
+            double prob
+            int i
+
+        with parallel(num_threads=6):
+            # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
+            # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
+
+            for i in prange(ods.size()):
+                link_union = link_union_set[i]
+                loads = new vector[double](link_union.size(), 0.0)  # FIXME FREE ME
+
+                # We now iterate over all routes in the route_set, each route has an associated probability
+                route_prob_iter = prob_set[i].cbegin()
+                for route in deref(results[i]):
+                    prob = deref(route_prob_iter)
+                    inc(route_prob_iter)
+
+                    if prob == 0.0:
+                        continue
+
+                    # For each link in the route, we need to assign the appropriate demand * prob
+                    # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
+                    # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
+                    # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
+                    # This is also incredibly cache efficient, the only downsides are that the code is harder to read
+                    # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
+                    # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
+                    # then used an N-way merge we could reuse the sorted routes and the sorted link union.
+                    links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved  # FIXME FREE ME
+                    sort(links.begin(), links.end())
+
+                    # links and link_union are sorted, and links is a subset of link_union
+                    link_union_iter = link_union.cbegin()
+                    link_iter = links.cbegin()
+
+                    while link_iter != links.cend():
+                        # Find the next location for the current link in links
+                        while deref(link_iter) != deref(link_union_iter):
+                            inc(link_union_iter)
 
+                        link_loc = link_union_iter - link_union.cbegin()
+                        deref(loads)[link_loc] = deref(loads)[link_loc] + prob  # += here results in all zeros? Odd
+
+                        inc(link_iter)
+
+                deref(link_loads)[i] = loads
+
+        return link_loads
+
+    cdef vector[double] *apply_link_loading_from_path_files(
+        RouteChoiceSet self,
+        double[:, :] matrix_view,
+        vector[vector[double] *] &path_files
+    ) noexcept nogil:
+        cdef:
+            vector[double] *loads
+            vector[long long] *link_union
+            long origin_index, dest_index
+            double demand
+
+            vector[double] *link_loads = new vector[double](self.num_links)  # FIXME FREE ME
+
+        for i in range(self.ods.size()):
+            loads = path_files[i]
+            link_union = deref(self.link_union_set)[i]
+
+            origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
+            dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
+            demand = matrix_view[origin_index, dest_index]
+
+            for j in range(link_union.size()):
+                link = deref(link_union)[j]
+                deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j]  # += here results in all zeros? Odd
+
+        return link_loads
+
+    cdef vector[double] *apply_link_loading(self, double[:, :] matrix_view) noexcept nogil:
+        cdef:
             RouteSet_t *route_set
-            double demand, load, prob
-            size_t length
+            vector[double] *route_set_prob
             long origin_index, dest_index
-            int i
+            double demand, prob, load
+
+            vector[double] *link_loads = new vector[double](self.num_links)  # FIXME FREE ME
+
+        for i in range(self.ods.size()):
+            route_set = deref(self.results)[i]
+            route_set_prob = deref(self.prob_set)[i]
+
+            origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
+            dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
+            demand = matrix_view[origin_index, dest_index]
+
+            route_prob_iter = route_set_prob.cbegin()
+            for route in deref(route_set):
+                prob = deref(route_prob_iter)
+                inc(route_prob_iter)
+
+                load = prob * demand
+                for link in deref(route):
+                    deref(link_loads)[link] = deref(link_loads)[link] + load  # += here results in all zeros? Odd
+
+        return link_loads
 
-        fprintf(stderr, "starting link loading\n")
-        with nogil:
-            with parallel(num_threads=1):
-                # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
-                # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
-                fprintf(stderr, "core: %d\n", threadid())
-
-                for i in prange(self.ods.size()):
-
-                    route_set = deref(self.results)[i]
-                    fprintf(stderr, "got route set\n")
-                    link_union = deref(self.link_union_set)[i]
-                    fprintf(stderr, "got link union\n")
-                    route_set_prob = deref(self.prob_set)[i]
-                    fprintf(stderr, "got route set probsk\n")
-
-                    fprintf(stderr, "making new loads vector\n")
-                    loads = new vector[double](link_union.size(), 0.0)  # FIXME FREE ME
-
-                    fprintf(stderr, "starting route iteration\n")
-                    # We now iterate over all routes in the route_set, each route has an associated probability
-                    route_prob_iter = route_set_prob.cbegin()
-                    for route in deref(route_set):
-                        prob = deref(route_prob_iter)
-                        inc(route_prob_iter)
-
-                        if prob == 0.0:
-                            continue
-
-                        # For each link in the route, we need to assign the appropriate demand * prob
-                        # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
-                        # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
-                        # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
-                        # This is also incredibly cache efficient, the only downsides are that the code is harder to read
-                        # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
-                        # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
-                        # then used an N-way merge we could reuse the sorted routes and the sorted link union.
-                        links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved  # FIXME FREE ME
-                        sort(links.begin(), links.end())
-
-                        # links and link_union are sorted, and links is a subset of link_union
-                        link_union_iter = link_union.cbegin()
-                        link_iter = links.cbegin()
-
-                        # fprintf(stderr, "starting link iteration\n")
-                        while link_iter != links.cend():
-                            # Find the next location for the current link in links
-                            while deref(link_iter) != deref(link_union_iter):
-                                inc(link_union_iter)
-
-                            fprintf(stderr, "adding prob of %f to link %d because link %d is in route\n", prob, deref(link_union_iter), deref(link_iter))
-                            deref(loads)[link_union_iter - link_union.cbegin()] = deref(loads)[link_union_iter - link_union.cbegin()] + prob
-
-                            inc(link_iter)
-
-                    deref(link_loads)[i] = loads
-                    with gil:
-                        print("path file:", deref(loads))
-
-            for i in range(self.ods.size()):
-                loads = deref(link_loads)[i]
-                link_union = deref(self.link_union_set)[i]
-
-                origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
-                dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
-                demand = matrix_view[origin_index, dest_index]
-                fprintf(stderr, "od idx: %d, %d has demand: %f\n", origin_index, dest_index, demand)
-
-                for j in range(link_union.size()):
-                    deref(collective_link_loads)[deref(link_union)[j]] = deref(collective_link_loads)[deref(link_union)[j]] + demand * deref(loads)[j]
-            with gil:
-                print("link loads:", deref(collective_link_loads))
-        return deref(collective_link_loads)
 
 
     @cython.wraparound(False)
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 2e632ac05..a63ef6ba5 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -208,6 +208,18 @@ def test_prob_results(self):
         for od, df in gb:
             self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0")
 
+    def test_link_loading(self):
+
+        np.random.seed(0)
+        rc = RouteChoiceSet(self.graph)
+        nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
+        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+
+        link_loads = rc.link_loading(self.mat)
+        link_loads2 = rc.link_loading(self.mat, generate_path_files=True)
+
+        np.testing.assert_array_almost_equal(link_loads, link_loads2)
+
 
 def generate_line_strings(project, graph, results):
     """Debug method"""

From 61edffedc1bf5274a033333f814c3ee1d087737f Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 10:41:07 +1000
Subject: [PATCH 06/52] Fix link ID ordering in compressed -> network mapping

---
 aequilibrae/paths/graph.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index 02c2e826d..6ddf56a3f 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -532,6 +532,9 @@ def compressed_link_network_mapping(self):
             **data** (:obj:`np.array`): array of link ids
         """
 
+        # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
+        # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute
+
         # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`,
         # we skip them.
         filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1]
@@ -543,13 +546,25 @@ def compressed_link_network_mapping(self):
         for compressed_id, df in gb:
             idx[compressed_id] = i
             values = df.link_id.values
-            for j in range(len(values)):
-                data[i + j] = values[j]
+            a = df.a_node.values
+            b = df.b_node.values
+
+            # In order to ensure that the link IDs come out in the correct order we must walk the links
+            # we do this assuming the `a` array is sorted.
+            j = 0
+            x = self.compact_graph.a_node.iat[compressed_id]
+            while True:
+                tmp = a.searchsorted(x)
+                if tmp < len(a) and a[tmp] == x:
+                    x = b[tmp]
+                    data[i + j] = values[tmp]
+                else:
+                    break
+                j += 1
 
             i += len(values)
 
         idx[-1] = i
-        return idx, data
 
 
 class Graph(GraphBase):

From d1c494ed88a4aabc9916dd2cb56cd11ff0032fe1 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:08:18 +1000
Subject: [PATCH 07/52] We don't need functools for this

---
 aequilibrae/paths/graph.py         | 27 ++++++++++++++++++---------
 aequilibrae/paths/route_choice.pyx |  2 +-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index 6ddf56a3f..a9cfead21 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -4,7 +4,6 @@
 from datetime import datetime
 from os.path import join
 from typing import List, Tuple, Optional
-import functools
 
 import numpy as np
 import pandas as pd
@@ -96,6 +95,9 @@ def __init__(self, logger=None):
 
         self.dead_end_links = np.array([])
 
+        self.compressed_link_network_mapping_idx = None
+        self.compressed_link_network_mapping_data = None
+
         # Randomly generate a unique Graph ID randomly
         self._id = uuid.uuid4().hex
 
@@ -168,11 +170,9 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None:
             self.__build_compressed_graph()
             self.compact_num_links = self.compact_graph.shape[0]
 
-        # The cache property should be recalculated when the graph has been reprepared
-        try:
-            del self.compressed_link_network_mapping
-        except AttributeError:
-            pass
+        # The cache property should be recalculated when the graph has been re-prepared
+        self.compressed_link_network_mapping_idx = None
+        self.compressed_link_network_mapping_data = None
 
     def __build_compressed_graph(self):
         build_compressed_graph(self)
@@ -512,10 +512,9 @@ def save_compressed_correspondence(self, path, mode_name, mode_id):
         node_path = join(path, f"nodes_to_indices_c{mode_name}_{mode_id}.feather")
         pd.DataFrame(self.nodes_to_indices, columns=["node_index"]).to_feather(node_path)
 
-    @functools.cached_property
-    def compressed_link_network_mapping(self):
+    def create_compressed_link_network_mapping(self):
         """
-        Two arrays providing a mapping of compressed id to link id.
+        Create two arrays providing a mapping of compressed id to link id.
 
         Uses sparse compression. Index ``idx`` by the by compressed id and compressed id + 1, the
         network IDs are then in the range ``idx[id]:idx[id + 1]``.
@@ -532,6 +531,11 @@ def compressed_link_network_mapping(self):
             **data** (:obj:`np.array`): array of link ids
         """
 
+        # Cache the result, this isn't a huge computation but isn't worth doing twice
+        if self.compressed_link_network_mapping_idx is not None \
+           and self.compressed_link_network_mapping_data is not None:
+            return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data
+
         # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
         # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute
 
@@ -566,6 +570,11 @@ def compressed_link_network_mapping(self):
 
         idx[-1] = i
 
+        self.compressed_link_network_mapping_idx = idx
+        self.compressed_link_network_mapping_data = data
+
+        return idx, data
+
 
 class Graph(GraphBase):
     def __init__(self, *args, **kwargs):
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index f186f0715..0d0f4529f 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -141,7 +141,7 @@ cdef class RouteChoiceSet:
         self.zones = graph.num_zones
         self.block_flows_through_centroids = graph.block_centroid_flows
 
-        self.mapping_idx, self.mapping_data = graph.compressed_link_network_mapping
+        self.mapping_idx, self.mapping_data = graph.create_compressed_link_network_mapping()
 
     def __dealloc__(self):
         """

From fbc04b784ca0a5f128421036d23306333c1aa97c Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:09:17 +1000
Subject: [PATCH 08/52] Reverse routes during computation, map link IDs during
 output

---
 aequilibrae/paths/route_choice.pxd |  3 ++-
 aequilibrae/paths/route_choice.pyx | 25 +++++++++++++++++++------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd
index 32c5f077c..a36bb6f1a 100644
--- a/aequilibrae/paths/route_choice.pxd
+++ b/aequilibrae/paths/route_choice.pxd
@@ -118,6 +118,7 @@ cdef extern from "arrow/builder.h" namespace "arrow" nogil:
         libpa.CStatus Append(const uint32_t value)
         libpa.CStatus AppendValues(const vector[uint32_t] &values)
         libpa.CStatus AppendValues(vector[uint32_t].const_reverse_iterator values_begin, vector[uint32_t].const_reverse_iterator values_end)
+        libpa.CStatus AppendValues(const uint32_t *values, int64_t length, const uint8_t *valid_bytes = nullptr)
 
     cdef cppclass CDoubleBuilder" arrow::DoubleBuilder"(libpa.CArrayBuilder):
         CDoubleBuilder(libpa.CMemoryPool* pool)
@@ -226,8 +227,8 @@ cdef class RouteChoiceSet:
     cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil
     cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil
 
-    @staticmethod
     cdef shared_ptr[libpa.CTable] make_table_from_results(
+        RouteChoiceSet self,
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &route_sets,
         vector[vector[double] *] *cost_set,
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index 0d0f4529f..01e108e79 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -63,7 +63,7 @@ from libcpp.vector cimport vector
 from libcpp.unordered_set cimport unordered_set
 from libcpp.unordered_map cimport unordered_map
 from libcpp.utility cimport pair
-from libcpp.algorithm cimport sort, lower_bound
+from libcpp.algorithm cimport sort, lower_bound, reverse
 from cython.operator cimport dereference as deref, preincrement as inc
 from cython.parallel cimport parallel, prange, threadid
 cimport openmp
@@ -433,7 +433,7 @@ cdef class RouteChoiceSet:
                     del link_union_scratch
 
             if where is not None:
-                table = libpa.pyarrow_wrap_table(RouteChoiceSet.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set))
+                table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set))
 
                 # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures
                 if path_size_logit:
@@ -584,6 +584,8 @@ cdef class RouteChoiceSet:
                         p = thread_predecessors[p]
                         vec.push_back(connector)
 
+                    reverse(vec.begin(), vec.end())
+
                     for connector in deref(vec):
                         # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements
                         # then add a single element. An incremental set hash function could be of use. However, the since of this set is
@@ -661,6 +663,8 @@ cdef class RouteChoiceSet:
                     p = thread_predecessors[p]
                     vec.push_back(connector)
 
+                reverse(vec.begin(), vec.end())
+
                 for connector in deref(vec):
                     thread_cost[connector] *= penatly
 
@@ -989,8 +993,8 @@ cdef class RouteChoiceSet:
     @cython.embedsignature(True)
     @cython.boundscheck(False)
     @cython.initializedcheck(False)
-    @staticmethod
     cdef shared_ptr[libpa.CTable] make_table_from_results(
+        RouteChoiceSet self,
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &route_sets,
         vector[vector[double] *] *cost_set,
@@ -1018,6 +1022,7 @@ cdef class RouteChoiceSet:
             libpa.CResult[shared_ptr[libpa.CArray]] route_set_results
 
             int offset = 0
+            size_t network_link_begin, network_link_end, link
             bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr)
 
         # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route]
@@ -1043,9 +1048,17 @@ cdef class RouteChoiceSet:
                 d_col.Append(ods[i].second)
 
                 offset_builder.Append(offset)
-                path_builder.AppendValues(route.crbegin(), route.crend())
 
-                offset += route.size()
+                for link in deref(route):
+                    # Translate the compressed link IDs in route to network link IDs, this is a 1:n mapping
+                    network_link_begin = self.mapping_idx[link]
+                    network_link_end = self.mapping_idx[link + 1]
+                    path_builder.AppendValues(
+                        &self.mapping_data[network_link_begin],
+                        network_link_end - network_link_begin
+                    )
+
+                    offset += network_link_end - network_link_begin
 
         path_builder.Finish(&paths)
 
@@ -1088,7 +1101,7 @@ cdef class RouteChoiceSet:
             raise ValueError("Route Choice results not computed yet")
 
         table = libpa.pyarrow_wrap_table(
-            RouteChoiceSet.make_table_from_results(
+            self.make_table_from_results(
                 deref(self.ods),
                 deref(self.results),
                 self.cost_set,

From 9a7ee100510bcf190dbb5cffeb7b914552353ce4 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:09:55 +1000
Subject: [PATCH 09/52] Fix tests

---
 tests/aequilibrae/paths/test_route_choice.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index a63ef6ba5..a33086732 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -34,6 +34,7 @@ def setUp(self) -> None:
         self.mat.computational_view()
 
     def tearDown(self) -> None:
+        self.mat.close()
         self.project.close()
 
     def test_route_choice(self):
@@ -50,7 +51,7 @@ def test_route_choice(self):
                 results = rc.run(a, b, max_routes=0, max_depth=1)
                 self.assertEqual(len(results), 1, "Depth of 1 didn't yield a lone route")
                 self.assertListEqual(
-                    results, [(1, 5, 8, 12, 24, 29, 52, 58)], "Initial route isn't the shortest A* route"
+                    results, [(2, 6, 9, 13, 25, 30, 53, 59)], "Initial route isn't the shortest A* route"
                 )
 
                 # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths
@@ -184,7 +185,11 @@ def test_cost_results(self):
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:
             for route, cost in zip(df["route set"].values, df["cost"].values):
-                np.testing.assert_almost_equal(self.graph.cost[route].sum(), cost, err_msg=f"Cost differs for OD {od}")
+                np.testing.assert_almost_equal(
+                    self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(),
+                    cost,
+                    err_msg=f", cost differs for OD {od}"
+                )
 
     def test_gamma_results(self):
         np.random.seed(0)
@@ -206,7 +211,7 @@ def test_prob_results(self):
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:
-            self.assertAlmostEqual(1.0, sum(df["probability"].values), msg="Probability not close to 1.0")
+            self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0")
 
     def test_link_loading(self):
 

From ea0853d6bda845ff0a0eda31bae288eb27b5f8e6 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:14:01 +1000
Subject: [PATCH 10/52] Fix windows compilation

---
 aequilibrae/paths/route_choice.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index 01e108e79..172b2f2e0 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -884,7 +884,7 @@ cdef class RouteChoiceSet:
 
             size_t link_loc
             double prob
-            int i
+            long long i
 
         with parallel(num_threads=6):
             # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.

From da984187ce51994628b9dd939a94392254b1fe31 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:16:04 +1000
Subject: [PATCH 11/52] Linting

---
 aequilibrae/paths/graph.py                   | 6 ++++--
 tests/aequilibrae/paths/test_route_choice.py | 3 +--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index a9cfead21..e25ed8353 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -532,8 +532,10 @@ def create_compressed_link_network_mapping(self):
         """
 
         # Cache the result, this isn't a huge computation but isn't worth doing twice
-        if self.compressed_link_network_mapping_idx is not None \
-           and self.compressed_link_network_mapping_data is not None:
+        if (
+            self.compressed_link_network_mapping_idx is not None
+            and self.compressed_link_network_mapping_data is not None
+        ):
             return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data
 
         # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index a33086732..38decf374 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -188,7 +188,7 @@ def test_cost_results(self):
                 np.testing.assert_almost_equal(
                     self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(),
                     cost,
-                    err_msg=f", cost differs for OD {od}"
+                    err_msg=f", cost differs for OD {od}",
                 )
 
     def test_gamma_results(self):
@@ -214,7 +214,6 @@ def test_prob_results(self):
             self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0")
 
     def test_link_loading(self):
-
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]

From 459d4be031eb7278f29f79746bb25b78a5677243 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:26:48 +1000
Subject: [PATCH 12/52] Add ruff to pre-commit hooks

---
 .pre-commit-config.yaml | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0e3fbbc53..9a0759689 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,9 +1,18 @@
 repos:
--   repo: https://github.com/ambv/black
-    rev: 22.3.0
-    hooks:
+- repo: https://github.com/astral-sh/ruff-pre-commit
+  # Ruff version.
+  rev: v0.3.0
+  hooks:
+    # Run the linter.
+    - id: ruff
+      args: [ --fix ]
+    # Run the formatter.
+    - id: ruff-format
+- repo: https://github.com/ambv/black
+  rev: 22.3.0
+  hooks:
     - id: black
--   repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
-    hooks:
-    - id: flake8
\ No newline at end of file
+- repo: https://github.com/pycqa/flake8
+  rev: 4.0.1
+  hooks:
+    - id: flake8

From 19f0d7e07150607d6db01ca6dd6ead557e520504 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 12:31:41 +1000
Subject: [PATCH 13/52] Update black pre-commit hook and drop flake8

---
 .pre-commit-config.yaml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9a0759689..d8b2a859c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,10 +9,6 @@ repos:
     # Run the formatter.
     - id: ruff-format
 - repo: https://github.com/ambv/black
-  rev: 22.3.0
+  rev: 24.1.1
   hooks:
     - id: black
-- repo: https://github.com/pycqa/flake8
-  rev: 4.0.1
-  hooks:
-    - id: flake8

From 192748fb5d13fa262dd5faf28b65ab1274cfbcd8 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 15:29:50 +1000
Subject: [PATCH 14/52] Translate link loads from compressed IDs to graph IDs
 when link

Add decorators as well
---
 aequilibrae/paths/route_choice.pxd |  6 +--
 aequilibrae/paths/route_choice.pyx | 59 ++++++++++++++++++++++++++----
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice.pxd
index a36bb6f1a..62df3e43f 100644
--- a/aequilibrae/paths/route_choice.pxd
+++ b/aequilibrae/paths/route_choice.pxd
@@ -135,6 +135,7 @@ cdef class RouteChoiceSet:
         double [:] lat_view
         double [:] lon_view
         long long [:] ids_graph_view
+        long long [:] graph_compressed_id_view
         long long [:] compressed_link_ids
         long long num_nodes
         long long num_links
@@ -214,14 +215,13 @@ cdef class RouteChoiceSet:
         double theta
     ) noexcept nogil
 
-    # cdef void link_loading(self, double[:, :] matrix_view) nogil
-
     @staticmethod
     cdef vector[vector[double] *] *compute_path_files(
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &results,
         vector[vector[long long] *] &link_union_set,
-        vector[vector[double] *] &prob_set
+        vector[vector[double] *] &prob_set,
+        unsigned int cores
     ) noexcept nogil
 
     cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice.pyx
index 172b2f2e0..6f2904bdb 100644
--- a/aequilibrae/paths/route_choice.pyx
+++ b/aequilibrae/paths/route_choice.pyx
@@ -88,6 +88,7 @@ from libc.stdio cimport fprintf, printf, stderr
 
 # It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times
 include 'basic_path_finding.pyx'
+include 'parallel_numpy.pyx'
 
 @cython.embedsignature(True)
 cdef class RouteChoiceSet:
@@ -136,6 +137,7 @@ cdef class RouteChoiceSet:
         self.a_star = False
 
         self.ids_graph_view = graph.compact_graph.id.values
+        self.graph_compressed_id_view = graph.graph.__compressed_id__.values
         self.num_nodes = graph.compact_num_nodes
         self.num_links = graph.compact_num_links
         self.zones = graph.num_zones
@@ -820,7 +822,8 @@ cdef class RouteChoiceSet:
 
         return prob_vec
 
-    def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False):
+    @cython.embedsignature(True)
+    def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0):
         if self.ods == nullptr \
            or self.link_union_set == nullptr \
            or self.prob_set == nullptr:
@@ -829,6 +832,8 @@ cdef class RouteChoiceSet:
         if not isinstance(matrix, AequilibraeMatrix):
             raise ValueError("`matrix` is not an AequilibraE matrix")
 
+        cores = cores if cores > 0 else openmp.omp_get_num_threads()
+
         cdef:
             vector[vector[double] *] *path_files = <vector[vector[double] *] *>nullptr
             vector[double] *ll
@@ -838,7 +843,8 @@ cdef class RouteChoiceSet:
                 deref(self.ods),
                 deref(self.results),
                 deref(self.link_union_set),
-                deref(self.prob_set)
+                deref(self.prob_set),
+                cores,
             )
             tmp = []
             for vec in deref(path_files):
@@ -853,7 +859,17 @@ cdef class RouteChoiceSet:
                 )
             else:
                 ll = self.apply_link_loading(m)
-            return deref(ll)
+
+            actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
+            assign_link_loads_cython(
+                actual,
+                # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship.
+                <double[:ll.size(), :1]>&deref(ll)[0],
+                self.graph_compressed_id_view,
+                cores
+            )
+            del ll
+            return actual
 
         if len(matrix.view_names) == 1:
             link_loads = apply_link_loading_func(matrix.matrix_view)
@@ -865,14 +881,23 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
-
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.embedsignature(True)
+    @cython.initializedcheck(False)
     @staticmethod
     cdef vector[vector[double] *] *compute_path_files(
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &results,
         vector[vector[long long] *] &link_union_set,
-        vector[vector[double] *] &prob_set
+        vector[vector[double] *] &prob_set,
+        unsigned int cores
     ) noexcept nogil:
+        """
+        Computes the path files for the provided vector of RouteSets.
+
+        Returns vector of vectors of link loads corresponding to each link in it's link_union_set.
+        """
         cdef:
             vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size())  # FIXME FREE ME
             vector[long long] *link_union
@@ -886,7 +911,7 @@ cdef class RouteChoiceSet:
             double prob
             long long i
 
-        with parallel(num_threads=6):
+        with parallel(num_threads=cores):
             # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
             # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
 
@@ -932,11 +957,22 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.embedsignature(True)
+    @cython.initializedcheck(False)
     cdef vector[double] *apply_link_loading_from_path_files(
         RouteChoiceSet self,
         double[:, :] matrix_view,
         vector[vector[double] *] &path_files
     ) noexcept nogil:
+        """
+        Apply link loading from path files.
+
+        If path files have already been computed then this is a more efficient manner for the link loading.
+
+        Returns a vector of link loads indexed by compressed link ID.
+        """
         cdef:
             vector[double] *loads
             vector[long long] *link_union
@@ -959,7 +995,16 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.embedsignature(True)
+    @cython.initializedcheck(False)
     cdef vector[double] *apply_link_loading(self, double[:, :] matrix_view) noexcept nogil:
+        """
+        Apply link loading.
+
+        Returns a vector of link loads indexed by compressed link ID.
+        """
         cdef:
             RouteSet_t *route_set
             vector[double] *route_set_prob
@@ -987,8 +1032,6 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
-
-
     @cython.wraparound(False)
     @cython.embedsignature(True)
     @cython.boundscheck(False)

From 6bdb1ffe8b069ffbf6764abbac9c3baa66309561 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 16:39:31 +1000
Subject: [PATCH 15/52] Rename Cython file to avoid name clash

---
 aequilibrae/paths/{route_choice.pxd => route_choice_set.pxd} | 0
 aequilibrae/paths/{route_choice.pyx => route_choice_set.pyx} | 0
 setup.py                                                     | 4 ++--
 tests/aequilibrae/paths/test_route_choice.py                 | 5 ++---
 4 files changed, 4 insertions(+), 5 deletions(-)
 rename aequilibrae/paths/{route_choice.pxd => route_choice_set.pxd} (100%)
 rename aequilibrae/paths/{route_choice.pyx => route_choice_set.pyx} (100%)

diff --git a/aequilibrae/paths/route_choice.pxd b/aequilibrae/paths/route_choice_set.pxd
similarity index 100%
rename from aequilibrae/paths/route_choice.pxd
rename to aequilibrae/paths/route_choice_set.pxd
diff --git a/aequilibrae/paths/route_choice.pyx b/aequilibrae/paths/route_choice_set.pyx
similarity index 100%
rename from aequilibrae/paths/route_choice.pyx
rename to aequilibrae/paths/route_choice_set.pyx
diff --git a/setup.py b/setup.py
index b68b463a6..e9a851217 100644
--- a/setup.py
+++ b/setup.py
@@ -61,8 +61,8 @@
 )
 
 ext_mod_bfs_le = Extension(
-    "aequilibrae.paths.route_choice",
-    [join("aequilibrae", "paths", "route_choice.pyx")],
+    "aequilibrae.paths.route_choice_set",
+    [join("aequilibrae", "paths", "route_choice_set.pyx")],
     extra_compile_args=compile_args,
     extra_link_args=link_args,
     define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 38decf374..5d310078d 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -9,13 +9,13 @@
 import pyarrow as pa
 
 from aequilibrae import Graph, Project
-from aequilibrae.paths.route_choice import RouteChoiceSet
+from aequilibrae.paths.route_choice_set import RouteChoiceSet
 
 from ...data import siouxfalls_project
 
 
 # In these tests `max_depth` should be provided to prevent a runaway test case and just burning CI time
-class TestRouteChoice(TestCase):
+class TestRouteChoiceSet(TestCase):
     def setUp(self) -> None:
         os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"]
 
@@ -180,7 +180,6 @@ def test_cost_results(self):
         rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
 
         table = rc.get_results().to_pandas()
-        # breakpoint()
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:

From 27c6d85a4a173b7b3b2abea3265280795782e60f Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 6 Mar 2024 17:11:12 +1000
Subject: [PATCH 16/52] Add wrapper object and begin API work

---
 aequilibrae/paths/__init__.py                |   1 +
 aequilibrae/paths/route_choice.py            | 175 +++++++++++++++++++
 aequilibrae/paths/route_choice_set.pyx       |  21 ++-
 tests/aequilibrae/paths/test_route_choice.py |   1 +
 4 files changed, 191 insertions(+), 7 deletions(-)
 create mode 100644 aequilibrae/paths/route_choice.py

diff --git a/aequilibrae/paths/__init__.py b/aequilibrae/paths/__init__.py
index 61af0cc6e..42b21a0f6 100644
--- a/aequilibrae/paths/__init__.py
+++ b/aequilibrae/paths/__init__.py
@@ -8,6 +8,7 @@
 from aequilibrae.paths.traffic_assignment import TrafficAssignment, TransitAssignment
 from aequilibrae.paths.vdf import VDF
 from aequilibrae.paths.graph import Graph, TransitGraph
+from aequilibrae.paths.route_choice import RouteChoice
 
 from aequilibrae import global_logger
 
diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
new file mode 100644
index 000000000..9de098905
--- /dev/null
+++ b/aequilibrae/paths/route_choice.py
@@ -0,0 +1,175 @@
+import numpy as np
+import socket
+from aequilibrae.context import get_active_project
+from aequilibrae.paths.graph import Graph
+from aequilibrae.paths.route_choice_set import RouteChoiceSet
+from typing import Optional
+import pyarrow as pa
+import pathlib
+
+import logging
+
+
+class RouteChoice:
+    all_algorithms = ["bfsle", "lp", "link-penalisation"]
+    default_paramaters = {
+        "beta": 1.0,
+        "theta": 1.0,
+        "penalty": 1.1,
+        "seed": 0,
+        "max_routes": 0,
+        "max_depth": 0,
+    }
+
+    def __init__(self, graph: Graph, project=None):
+        self.paramaters = self.default_paramaters.copy()
+
+        proj = project or get_active_project(must_exist=False)
+        self.project = proj
+
+        self.logger = proj.logger if proj else logging.getLogger("aequilibrae")
+
+        self.cores: int = 0
+        self.graph = graph
+        self.__rc = RouteChoiceSet(graph)
+
+        self.schema = RouteChoiceSet.schema
+        self.psl_schema = RouteChoiceSet.psl_schema
+
+        self.compact_link_loads: Optional[np.array] = None
+        self.link_loads: Optional[np.array] = None
+        self.results: Optional[pa.Table] = None
+        self.where: Optional[pathlib.Path] = None
+
+    def set_algorithm(self, algorithm: str):
+        """
+        Chooses the assignment algorithm.
+        Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'
+
+        'lp' is also accepted as an alternative to 'link-penalisation'
+
+        :Arguments:
+            **algorithm** (:obj:`str`): Algorithm to be used
+        """
+        algo_dict = {i: i for i in self.all_algorithms}
+        algo_dict["lp"] = "link-penalisation"
+        algo = algo_dict.get(algorithm.lower())
+
+        if algo is None:
+            raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}")
+
+        self.algorithm = algo
+        self._config["Algorithm"] = algo
+
+    def set_cores(self, cores: int) -> None:
+        """Allows one to set the number of cores to be used
+
+            Inherited from :obj:`AssignmentResultsBase`
+
+        :Arguments:
+            **cores** (:obj:`int`): Number of CPU cores to use
+        """
+        if not self.classes:
+            raise RuntimeError("You need load transit classes before overwriting the number of cores")
+
+        self.cores = cores
+
+    def set_paramaters(self, par: dict):
+        """
+        Sets the parameters for the route choice  TODO, do we want link specific values?
+
+        "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation.
+        "penalty" is a link penalisation specific parameter and will have no effect on BFSLE.
+
+        Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms.
+
+        - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is
+            largely dependent on the size of the paths within the network. For very small networks a value of 10
+            is a recommended starting point. For large networks a good starting value is 5. Increase the value
+            until the number of desired routes is being consistently returned.
+
+        - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced,
+            it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field,
+            specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
+
+
+        Parameter values can be scalars (same values for the entire network) or network field names
+        (link-specific values) - Examples: {'alpha': 0.15, 'beta': 4.0} or  {'alpha': 'alpha', 'beta': 'beta'}
+
+
+        :Arguments:
+            **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF
+        """
+
+        if any(key not in self.default_paramaters for key in par.keys()):
+            raise ValueError("Invalid parameter provided")
+
+        self.paramaters = self.default_paramaters | par
+
+    def set_save_path_files(self, save_it: bool) -> None:
+        """Turn path saving on or off.
+
+        :Arguments:
+            **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved
+        """
+        self.save_path_files = save_it
+
+    def set_save_routes(self, where: Optional[str] = None) -> None:
+        """
+        Set save path for route choice resutls. Provide ``None`` to disable.
+
+        **warning** enabling route saving will disable in memory results. Viewing the results will read the results
+        from disk first.
+
+        :Arguments:
+            **save_it** (:obj:`bool`): Boolean to indicate whether routes should be saved
+        """
+        self.where = pathlib.Path(where) if where is not None else None
+
+    def info(self) -> dict:
+        """Returns information for the transit assignment procedure
+
+        Dictionary contains keys  'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID'.
+
+        The classes key is also a dictionary with all the user classes per transit class and their respective
+        matrix totals
+
+        :Returns:
+            **info** (:obj:`dict`): Dictionary with summary information
+        """
+
+        matrix_totals = {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)}
+
+        info = {
+            "Algorithm": self.algorithm,
+            "Matrix totals": matrix_totals,
+            "Computer name": socket.gethostname(),
+            "Procedure ID": self.procedure_id,
+            "Parameters": self.paramaters,
+        }
+        return info
+
+    def log_specification(self):
+        self.logger.info("Route Choice specification")
+        self.logger.info(self._config)
+
+    def results(self):
+        """Returns the results of the route choice procedure
+
+        Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
+
+        :Returns:
+            **results** (:obj:`pa.Table`): Table with the results of the route choice procedure
+
+        """
+        if self.results is None:
+            try:
+                self.results = self.__rc.get_results()
+            except RuntimeError as err:
+                if self.where is None:
+                    raise ValueError("Route choice results not computed and read/save path not specificed") from err
+                self.results = pa.dataset.dataset(
+                    self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema)
+                )
+
+        return self.results
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 6f2904bdb..3d0786658 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -52,7 +52,7 @@ routes aren't required small-ish things like the memcpy and banned link set copy
 
 """
 
-from aequilibrae import Graph
+from aequilibrae.paths.graph import Graph
 
 from libc.math cimport INFINITY, pow, exp
 from libc.string cimport memcpy
@@ -846,11 +846,14 @@ cdef class RouteChoiceSet:
                 deref(self.prob_set),
                 cores,
             )
+
+            # FIXME, write out path files
             tmp = []
             for vec in deref(path_files):
                 tmp.append(deref(vec))
             print(tmp)
 
+
         def apply_link_loading_func(m):
             if generate_path_files:
                 ll = self.apply_link_loading_from_path_files(
@@ -860,16 +863,20 @@ cdef class RouteChoiceSet:
             else:
                 ll = self.apply_link_loading(m)
 
+            # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship.
+            compressed = <double[:ll.size(), :1]>&deref(ll)[0]
+
             actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
             assign_link_loads_cython(
                 actual,
-                # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship.
-                <double[:ll.size(), :1]>&deref(ll)[0],
+                compressed,
                 self.graph_compressed_id_view,
                 cores
             )
+            compressed = np.array(compressed, copy=True)
             del ll
-            return actual
+            return actual.reshape(-1), compressed.reshape(-1)
+
 
         if len(matrix.view_names) == 1:
             link_loads = apply_link_loading_func(matrix.matrix_view)
@@ -1137,11 +1144,11 @@ cdef class RouteChoiceSet:
     def get_results(self):  # Cython doesn't like this type annotation... -> pa.Table:
         """
         :Returns:
-            **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of compact link IDs for
-                each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``. None if ``where`` was not None.
+            **route sets** (:obj:`pyarrow.Table`): Returns a table of OD pairs to lists of link IDs for
+                each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
         """
         if self.results == nullptr or self.ods == nullptr:
-            raise ValueError("Route Choice results not computed yet")
+            raise RuntimeError("Route Choice results not computed yet")
 
         table = libpa.pyarrow_wrap_table(
             self.make_table_from_results(
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 5d310078d..879dfda6a 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -10,6 +10,7 @@
 
 from aequilibrae import Graph, Project
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
+from aequilibrae.paths.route_choice import RouteChoice
 
 from ...data import siouxfalls_project
 

From a6bcf8607123c51e6722d186a0e7451351976610 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 12 Mar 2024 15:39:06 +1000
Subject: [PATCH 17/52] Cannot rely on the ordering of nodes when building the
 mapping

---
 aequilibrae/paths/graph.py                   | 3 ++-
 tests/aequilibrae/paths/test_route_choice.py | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index e25ed8353..f28834151 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -558,7 +558,8 @@ def create_compressed_link_network_mapping(self):
             # In order to ensure that the link IDs come out in the correct order we must walk the links
             # we do this assuming the `a` array is sorted.
             j = 0
-            x = self.compact_graph.a_node.iat[compressed_id]
+            # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup
+            x = a[np.isin(a, b, invert=True, assume_unique=True)][0]
             while True:
                 tmp = a.searchsorted(x)
                 if tmp < len(a) and a[tmp] == x:
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 879dfda6a..c31c7b6a2 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -186,7 +186,7 @@ def test_cost_results(self):
         for od, df in gb:
             for route, cost in zip(df["route set"].values, df["cost"].values):
                 np.testing.assert_almost_equal(
-                    self.graph.cost[self.graph.graph.link_id.isin(route).values.nonzero()[0]].sum(),
+                    self.graph.network.set_index("link_id").loc[route][self.graph.cost_field].sum(),
                     cost,
                     err_msg=f", cost differs for OD {od}",
                 )

From ed03ca07de82f274a23df3031df6165ecf88d1b0 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 13 Mar 2024 11:38:06 +1000
Subject: [PATCH 18/52] Rename gamma -> path_overlap

---
 aequilibrae/paths/route_choice_set.pxd       |  8 +--
 aequilibrae/paths/route_choice_set.pyx       | 72 ++++++++++----------
 tests/aequilibrae/paths/test_route_choice.py |  4 +-
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index 62df3e43f..e80a41c6a 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -147,7 +147,7 @@ cdef class RouteChoiceSet:
         vector[RouteSet_t *] *results
         vector[vector[long long] *] *link_union_set
         vector[vector[double] *] *cost_set
-        vector[vector[double] *] *gamma_set
+        vector[vector[double] *] *path_overlap_set
         vector[vector[double] *] *prob_set
 
         unsigned int [:] mapping_idx
@@ -200,7 +200,7 @@ cdef class RouteChoiceSet:
     cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil
 
     @staticmethod
-    cdef vector[double] *compute_gamma(
+    cdef vector[double] *compute_path_overlap(
         RouteSet_t *route_set,
         pair[vector[long long] *, vector[long long] *] &freq_set,
         vector[double] &total_cost,
@@ -210,7 +210,7 @@ cdef class RouteChoiceSet:
     @staticmethod
     cdef vector[double] *compute_prob(
         vector[double] &total_cost,
-        vector[double] &gamma_vec,
+        vector[double] &path_overlap_vec,
         double beta,
         double theta
     ) noexcept nogil
@@ -232,7 +232,7 @@ cdef class RouteChoiceSet:
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &route_sets,
         vector[vector[double] *] *cost_set,
-        vector[vector[double] *] *gamma_set,
+        vector[vector[double] *] *path_overlap_set,
         vector[vector[double] *] *prob_set
     )
 
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 3d0786658..d6ed083f0 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -110,7 +110,7 @@ cdef class RouteChoiceSet:
         pa.field("destination id", pa.uint32(), nullable=False),
         pa.field("route set", route_set_dtype, nullable=False),
         pa.field("cost", pa.float64(), nullable=False),
-        pa.field("gamma", pa.float64(), nullable=False),
+        pa.field("path overlap", pa.float64(), nullable=False),
         pa.field("probability", pa.float64(), nullable=False),
     ])
 
@@ -119,7 +119,7 @@ cdef class RouteChoiceSet:
         results = <vector[RouteSet_t *] *>nullptr
         link_union_set = <vector[vector[long long] *] *>nullptr
         cost_set = <vector[vector[double] *] *>nullptr
-        gamma_set = <vector[vector[double] *] *>nullptr
+        path_overlap_set = <vector[vector[double] *] *>nullptr
         prob_set = <vector[vector[double] *] *>nullptr
         ods = <vector[pair[long long, long long]] *>nullptr
 
@@ -181,11 +181,11 @@ cdef class RouteChoiceSet:
             del self.cost_set
             self.cost_set = <vector[vector[double] *] *>nullptr
 
-        if self.gamma_set != nullptr:
-            for double_vec in deref(self.gamma_set):
+        if self.path_overlap_set != nullptr:
+            for double_vec in deref(self.path_overlap_set):
                 del double_vec
-            del self.gamma_set
-            self.gamma_set = <vector[vector[double] *] *>nullptr
+            del self.path_overlap_set
+            self.path_overlap_set = <vector[vector[double] *] *>nullptr
 
         if self.prob_set != nullptr:
             for double_vec in deref(self.prob_set):
@@ -330,13 +330,13 @@ cdef class RouteChoiceSet:
             vector[long long] *link_union_scratch = <vector[long long] *>nullptr
             vector[vector[long long] *] *link_union_set = <vector[vector[long long] *] *>nullptr
             vector[vector[double] *] *cost_set = <vector[vector[double] *] *>nullptr
-            vector[vector[double] *] *gamma_set = <vector[vector[double] *] *>nullptr
+            vector[vector[double] *] *path_overlap_set = <vector[vector[double] *] *>nullptr
             vector[vector[double] *] *prob_set = <vector[vector[double] *] *>nullptr
 
         if path_size_logit:
             link_union_set = new vector[vector[long long] *](max_results_len)
             cost_set = new vector[vector[double] *](max_results_len)
-            gamma_set = new vector[vector[double] *](max_results_len)
+            path_overlap_set = new vector[vector[double] *](max_results_len)
             prob_set = new vector[vector[double] *](max_results_len)
 
         self.deallocate_results()  # We have be storing results from a previous run
@@ -352,12 +352,12 @@ cdef class RouteChoiceSet:
                 # - the internal objects were freed by the previous iteration
                 link_union_set.clear()
                 cost_set.clear()
-                gamma_set.clear()
+                path_overlap_set.clear()
                 prob_set.clear()
 
                 link_union_set.resize(batch_len)
                 cost_set.resize(batch_len)
-                gamma_set.resize(batch_len)
+                path_overlap_set.resize(batch_len)
                 prob_set.resize(batch_len)
 
             with nogil, parallel(num_threads=c_cores):
@@ -415,8 +415,8 @@ cdef class RouteChoiceSet:
                         freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch))
                         deref(link_union_set)[i] = freq_pair.first
                         deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view)
-                        deref(gamma_set)[i] = RouteChoiceSet.compute_gamma(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view)
-                        deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(gamma_set)[i]), beta, theta)
+                        deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view)
+                        deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(path_overlap_set)[i]), beta, theta)
                         del freq_pair.second  # While we need the unique sorted links (.first), we don't need the frequencies (.second)
 
                     deref(results)[i] = route_set
@@ -435,14 +435,14 @@ cdef class RouteChoiceSet:
                     del link_union_scratch
 
             if where is not None:
-                table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, gamma_set, prob_set))
+                table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set))
 
                 # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures
                 if path_size_logit:
                     for j in range(batch_len):
                         del deref(link_union_set)[j]
                         del deref(cost_set)[j]
-                        del deref(gamma_set)[j]
+                        del deref(path_overlap_set)[j]
                         del deref(prob_set)[j]
 
                 for j in range(batch_len):
@@ -462,13 +462,13 @@ cdef class RouteChoiceSet:
             if path_size_logit:
                 del link_union_set
                 del cost_set
-                del gamma_set
+                del path_overlap_set
                 del prob_set
         else:
             self.results = results
             self.link_union_set = link_union_set
             self.cost_set = cost_set
-            self.gamma_set = gamma_set
+            self.path_overlap_set = path_overlap_set
             self.prob_set = prob_set
 
             # Copy the c_ods vector, it was provided by the auto Cython conversion and is allocated on the stack,
@@ -748,7 +748,7 @@ cdef class RouteChoiceSet:
     @cython.boundscheck(False)
     @cython.initializedcheck(False)
     @staticmethod
-    cdef vector[double] *compute_gamma(
+    cdef vector[double] *compute_path_overlap(
         RouteSet_t *route_set,
         pair[vector[long long] *, vector[long long] *] &freq_set,
         vector[double] &total_cost,
@@ -764,32 +764,32 @@ cdef class RouteChoiceSet:
             sum_{k in R}: delta_{a,k}: freq_set
         """
         cdef:
-            vector[double] *gamma_vec
+            vector[double] *path_overlap_vec
 
             # Scratch objects
             vector[long long].const_iterator link_iter
-            double gamma
+            double path_overlap
             long long link, j
             size_t i
 
-        gamma_vec = new vector[double]()
-        gamma_vec.reserve(route_set.size())
+        path_overlap_vec = new vector[double]()
+        path_overlap_vec.reserve(route_set.size())
 
         j = 0
         for route in deref(route_set):
-            gamma = 0.0
+            path_overlap = 0.0
             for link in deref(route):
                 # We know the frequency table is ordered and contains every link in the union of the routes.
                 # We want to find the index of the link, and use that to look up it's frequency
                 link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link)
 
-                gamma = gamma + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()]
+                path_overlap = path_overlap + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()]
 
-            gamma_vec.push_back(gamma / total_cost[j])
+            path_overlap_vec.push_back(path_overlap / total_cost[j])
 
             j = j + 1
 
-        return gamma_vec
+        return path_overlap_vec
 
     @cython.wraparound(False)
     @cython.embedsignature(True)
@@ -798,7 +798,7 @@ cdef class RouteChoiceSet:
     @staticmethod
     cdef vector[double] *compute_prob(
         vector[double] &total_cost,
-        vector[double] &gamma_vec,
+        vector[double] &path_overlap_vec,
         double beta,
         double theta
     ) noexcept nogil:
@@ -816,7 +816,7 @@ cdef class RouteChoiceSet:
         for i in range(total_cost.size()):
             inv_prob = 0.0
             for j in range(total_cost.size()):
-                inv_prob = inv_prob + pow(gamma_vec[j] / gamma_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i]))
+                inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i]))
 
             prob_vec.push_back(1.0 / inv_prob)
 
@@ -1048,7 +1048,7 @@ cdef class RouteChoiceSet:
         vector[pair[long long, long long]] &ods,
         vector[RouteSet_t *] &route_sets,
         vector[vector[double] *] *cost_set,
-        vector[vector[double] *] *gamma_set,
+        vector[vector[double] *] *path_overlap_set,
         vector[vector[double] *] *prob_set
     ):
         cdef:
@@ -1060,7 +1060,7 @@ cdef class RouteChoiceSet:
             # Custom imports, these are declared in route_choice.pxd *not* libarrow.
             CUInt32Builder *path_builder = new CUInt32Builder(pool)
             CDoubleBuilder *cost_col = <CDoubleBuilder *>nullptr
-            CDoubleBuilder *gamma_col = <CDoubleBuilder *>nullptr
+            CDoubleBuilder *path_overlap_col = <CDoubleBuilder *>nullptr
             CDoubleBuilder *prob_col = <CDoubleBuilder *>nullptr
 
             libpa.CInt32Builder *offset_builder = new libpa.CInt32Builder(pool)  # Must be Int32 *not* UInt32
@@ -1073,19 +1073,19 @@ cdef class RouteChoiceSet:
 
             int offset = 0
             size_t network_link_begin, network_link_end, link
-            bint psl = (cost_set != nullptr and gamma_set != nullptr and prob_set != nullptr)
+            bint psl = (cost_set != nullptr and path_overlap_set != nullptr and prob_set != nullptr)
 
-        # Origins, Destination, Route set, [Cost for route, Gamma for route, Probability for route]
+        # Origins, Destination, Route set, [Cost for route, Path_Overlap for route, Probability for route]
         columns.resize(6 if psl else 3)
 
         if psl:
             cost_col = new CDoubleBuilder(pool)
-            gamma_col = new CDoubleBuilder(pool)
+            path_overlap_col = new CDoubleBuilder(pool)
             prob_col = new CDoubleBuilder(pool)
 
             for i in range(ods.size()):
                 cost_col.AppendValues(deref(deref(cost_set)[i]))
-                gamma_col.AppendValues(deref(deref(gamma_set)[i]))
+                path_overlap_col.AppendValues(deref(deref(path_overlap_set)[i]))
                 prob_col.AppendValues(deref(deref(prob_set)[i]))
 
         for i in range(ods.size()):
@@ -1123,7 +1123,7 @@ cdef class RouteChoiceSet:
 
         if psl:
             cost_col.Finish(&columns[3])
-            gamma_col.Finish(&columns[4])
+            path_overlap_col.Finish(&columns[4])
             prob_col.Finish(&columns[5])
 
         cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema)
@@ -1136,7 +1136,7 @@ cdef class RouteChoiceSet:
 
         if psl:
             del cost_col
-            del gamma_col
+            del path_overlap_col
             del prob_col
 
         return table
@@ -1155,7 +1155,7 @@ cdef class RouteChoiceSet:
                 deref(self.ods),
                 deref(self.results),
                 self.cost_set,
-                self.gamma_set,
+                self.path_overlap_set,
                 self.prob_set
             )
         )
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index c31c7b6a2..538b70fc1 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -191,7 +191,7 @@ def test_cost_results(self):
                     err_msg=f", cost differs for OD {od}",
                 )
 
-    def test_gamma_results(self):
+    def test_path_overlap_results(self):
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
@@ -200,7 +200,7 @@ def test_gamma_results(self):
 
         gb = table.groupby(by=["origin id", "destination id"])
         for od, df in gb:
-            self.assertTrue(all((df["gamma"] > 0) & (df["gamma"] <= 1)))
+            self.assertTrue(all((df["path overlap"] > 0) & (df["path overlap"] <= 1)))
 
     def test_prob_results(self):
         np.random.seed(0)

From 74e525aa8995dbb0ba21d93c0f01ed8de095bf45 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 12 Mar 2024 15:39:50 +1000
Subject: [PATCH 19/52] Prevent deadend removal + graph compression introducing
 simple loops

---
 aequilibrae/paths/graph_building.pyx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx
index 4bd13c85e..3bb3ef4c9 100644
--- a/aequilibrae/paths/graph_building.pyx
+++ b/aequilibrae/paths/graph_building.pyx
@@ -325,6 +325,10 @@ def build_compressed_graph(graph):
             "link_id": np.arange(slink),
         }
     )
+
+    # Link compression can introduce new simple cycles into the graph
+    comp_lnk = comp_lnk[comp_lnk.a_node != comp_lnk.b_node]
+
     max_link_id = link_id_max * 10
     comp_lnk.link_id += max_link_id
 

From 0b1ac041eaf1cab73aee35de67e9f952f9b035cb Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 13 Mar 2024 20:01:34 +1000
Subject: [PATCH 20/52] Move NetworkGraphIndices dataclass, add node mapping,
 extend API

untested
---
 aequilibrae/paths/graph.py                    |  41 +++++-
 .../paths/results/assignment_results.py       |  21 +--
 aequilibrae/paths/route_choice.py             | 126 +++++++++++++++++-
 aequilibrae/paths/route_choice_set.pyx        |   2 +-
 4 files changed, 163 insertions(+), 27 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index f28834151..a8ee6a2ce 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from os.path import join
 from typing import List, Tuple, Optional
+import dataclasses
 
 import numpy as np
 import pandas as pd
@@ -12,6 +13,26 @@
 from aequilibrae.context import get_logger
 
 
+@dataclasses.dataclass
+class NetworkGraphIndices:
+    network_ab_idx: np.array
+    network_ba_idx: np.array
+    graph_ab_idx: np.array
+    graph_ba_idx: np.array
+
+
+def _get_graph_to_network_mapping(lids, direcs):
+    num_uncompressed_links = int(np.unique(lids).shape[0])
+    indexing = np.zeros(int(lids.max()) + 1, np.uint64)
+    indexing[np.unique(lids)[:]] = np.arange(num_uncompressed_links)
+
+    graph_ab_idx = direcs > 0
+    graph_ba_idx = direcs < 0
+    network_ab_idx = indexing[lids[graph_ab_idx]]
+    network_ba_idx = indexing[lids[graph_ba_idx]]
+    return NetworkGraphIndices(network_ab_idx, network_ba_idx, graph_ab_idx, graph_ba_idx)
+
+
 class GraphBase(ABC):  # noqa: B024
     """
     Graph class.
@@ -173,6 +194,7 @@ def prepare_graph(self, centroids: Optional[np.ndarray]) -> None:
         # The cache property should be recalculated when the graph has been re-prepared
         self.compressed_link_network_mapping_idx = None
         self.compressed_link_network_mapping_data = None
+        self.network_compressed_node_mapping = None
 
     def __build_compressed_graph(self):
         build_compressed_graph(self)
@@ -535,8 +557,13 @@ def create_compressed_link_network_mapping(self):
         if (
             self.compressed_link_network_mapping_idx is not None
             and self.compressed_link_network_mapping_data is not None
+            and self.network_compressed_node_mapping is not None
         ):
-            return self.compressed_link_network_mapping_idx, self.compressed_link_network_mapping_data
+            return (
+                self.compressed_link_network_mapping_idx,
+                self.compressed_link_network_mapping_data,
+                self.network_compressed_node_mapping,
+            )
 
         # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
         # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute
@@ -548,6 +575,8 @@ def create_compressed_link_network_mapping(self):
         idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32)
         data = np.zeros(len(filtered), dtype=np.uint32)
 
+        node_mapping = np.full(self.num_nodes, -1)
+
         i = 0
         for compressed_id, df in gb:
             idx[compressed_id] = i
@@ -559,7 +588,8 @@ def create_compressed_link_network_mapping(self):
             # we do this assuming the `a` array is sorted.
             j = 0
             # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup
-            x = a[np.isin(a, b, invert=True, assume_unique=True)][0]
+
+            a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0]
             while True:
                 tmp = a.searchsorted(x)
                 if tmp < len(a) and a[tmp] == x:
@@ -569,14 +599,19 @@ def create_compressed_link_network_mapping(self):
                     break
                 j += 1
 
+            b_node = x
+            node_mapping[a_node] = self.compact_graph["a_node"].iat[compressed_id]
+            node_mapping[b_node] = self.compact_graph["b_node"].iat[compressed_id]
+
             i += len(values)
 
         idx[-1] = i
 
         self.compressed_link_network_mapping_idx = idx
         self.compressed_link_network_mapping_data = data
+        self.network_compressed_node_mapping = node_mapping
 
-        return idx, data
+        return idx, data, node_mapping
 
 
 class Graph(GraphBase):
diff --git a/aequilibrae/paths/results/assignment_results.py b/aequilibrae/paths/results/assignment_results.py
index 220b7033a..1c6da335a 100644
--- a/aequilibrae/paths/results/assignment_results.py
+++ b/aequilibrae/paths/results/assignment_results.py
@@ -1,10 +1,9 @@
-import dataclasses
 import multiprocessing as mp
 from abc import ABC, abstractmethod
 
 import numpy as np
 from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData
-from aequilibrae.paths.graph import Graph, TransitGraph, GraphBase
+from aequilibrae.paths.graph import Graph, TransitGraph, GraphBase, _get_graph_to_network_mapping
 from aequilibrae.parameters import Parameters
 from aequilibrae import global_logger
 from pathlib import Path
@@ -22,14 +21,6 @@
 """
 
 
-@dataclasses.dataclass
-class NetworkGraphIndices:
-    network_ab_idx: np.array
-    network_ba_idx: np.array
-    graph_ab_idx: np.array
-    graph_ba_idx: np.array
-
-
 class AssignmentResultsBase(ABC):
     """Assignment results base class for traffic and transit assignments."""
 
@@ -249,15 +240,7 @@ def total_flows(self) -> None:
         sum_axis1(self.total_link_loads, self.link_loads, self.cores)
 
     def get_graph_to_network_mapping(self):
-        num_uncompressed_links = int(np.unique(self.lids).shape[0])
-        indexing = np.zeros(int(self.lids.max()) + 1, np.uint64)
-        indexing[np.unique(self.lids)[:]] = np.arange(num_uncompressed_links)
-
-        graph_ab_idx = self.direcs > 0
-        graph_ba_idx = self.direcs < 0
-        network_ab_idx = indexing[self.lids[graph_ab_idx]]
-        network_ba_idx = indexing[self.lids[graph_ba_idx]]
-        return NetworkGraphIndices(network_ab_idx, network_ba_idx, graph_ab_idx, graph_ba_idx)
+        return _get_graph_to_network_mapping(self.lids, self.direcs)
 
     def get_load_results(self) -> AequilibraeData:
         """
diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 9de098905..467ba3bd5 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -1,11 +1,13 @@
 import numpy as np
 import socket
 from aequilibrae.context import get_active_project
-from aequilibrae.paths.graph import Graph
+from aequilibrae.paths.graph import Graph, _get_graph_to_network_mapping
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
-from typing import Optional
+from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData
+from typing import Optional, Union, Tuple, List
 import pyarrow as pa
 import pathlib
+import itertools
 
 import logging
 
@@ -21,7 +23,7 @@ class RouteChoice:
         "max_depth": 0,
     }
 
-    def __init__(self, graph: Graph, project=None):
+    def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
         self.paramaters = self.default_paramaters.copy()
 
         proj = project or get_active_project(must_exist=False)
@@ -31,7 +33,8 @@ def __init__(self, graph: Graph, project=None):
 
         self.cores: int = 0
         self.graph = graph
-        self.__rc = RouteChoiceSet(graph)
+        self.matrix = matrix
+        self.__rc = None
 
         self.schema = RouteChoiceSet.schema
         self.psl_schema = RouteChoiceSet.psl_schema
@@ -41,6 +44,8 @@ def __init__(self, graph: Graph, project=None):
         self.results: Optional[pa.Table] = None
         self.where: Optional[pathlib.Path] = None
 
+        self.nodes = Optional[Union[List[int], List[Tuple[int, int]]]] = None
+
     def set_algorithm(self, algorithm: str):
         """
         Chooses the assignment algorithm.
@@ -113,6 +118,7 @@ def set_save_path_files(self, save_it: bool) -> None:
             **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved
         """
         self.save_path_files = save_it
+        raise NotImplementedError()
 
     def set_save_routes(self, where: Optional[str] = None) -> None:
         """
@@ -126,6 +132,42 @@ def set_save_routes(self, where: Optional[str] = None) -> None:
         """
         self.where = pathlib.Path(where) if where is not None else None
 
+    def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]):
+        """
+        Prepare OD pairs for batch computation.
+
+        :Arguments:
+            **nodes** (:obj:`Union[list[int], list[tuple[int, int]]]`): List of node IDs to operate on. If a 1D list is
+                provided, OD pairs are taken to be all pair permutations of the list. If a list of pairs is provided
+                OD pairs are taken as is. All node IDs must be present in the compressed graph. To make a node ID
+                always appear in the compressed graph add it as a centroid. Duplicates will be dropped on execution.
+        """
+        if len(nodes) == 0:
+            raise ValueError("`nodes` list-like empty.")
+
+        if isinstance(nodes[0], tuple):
+            # Selection of OD pairs
+            if any(len(x) != 2 for x in nodes):
+                raise ValueError("`nodes` list contains non-pair elements")
+            self.nodes = nodes
+
+        elif isinstance(nodes[0], int):
+            self.nodes = list(itertools.permutations(nodes, r=2))
+
+    def execute_single(self, origin: int, destination: int):
+        if self.__rc is None:
+            self.__rc = RouteChoiceSet(self.graph)
+
+        return self.__rc.run(origin, destination, **self.paramaters)
+
+    def execute(self, path_size_logit: bool = False):
+        if self.__rc is None:
+            self.__rc = RouteChoiceSet(self.graph)
+
+        return self.__rc.batched(
+            self.nodes, bfsle=self.algorithm == "bfsle", path_size_logit=path_size_logit, **self.paramaters
+        )
+
     def info(self) -> dict:
         """Returns information for the transit assignment procedure
 
@@ -173,3 +215,79 @@ def results(self):
                 )
 
         return self.results
+
+    def get_load_results(
+        self, which: str = "uncompressed"
+    ) -> Union[Tuple[AequilibraeData, AequilibraeData], Tuple[AequilibraeData]]:
+        """
+        Translates the link loading results from the graph format into the network format.
+
+        :Returns:
+            **dataset** (:obj:`tuple[AequilibraeData]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results.
+        """
+
+        if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]:
+            raise ValueError("`which` argumnet must be one of ['uncompressed', 'compressed', 'both']")
+
+        compressed = which == "both" or which == "compressed"
+        uncompressed = which == "both" or which == "uncompressed"
+
+        fields = self.matrix.names
+
+        tmp = self.__rc.link_loading(self.matrix, self.save_path_files)
+        if isinstance(tmp, dict):
+            self.link_loads = {k: v[0] for k, v in tmp.items()}
+            self.compact_link_loads = {k: v[1] for k, v in tmp.items()}
+        else:
+            self.link_loads = {fields[0]: tmp[0]}
+            self.compact_link_loads = {fields[0]: tmp[1]}
+
+        # Get a mapping from the compressed graph to/from the network graph
+        m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values)
+
+        # Create a data store with a row for each uncompressed link
+
+        if uncompressed:
+            uncompressed_res = AequilibraeData.empty(
+                memory_mode=True,
+                entries=self.graph.num_links,
+                field_names=fields,
+                data_types=[np.float64] * len(fields),
+                fill=np.nan,
+                index=self.graph.graph.link_id.values,
+            )
+
+            for k, v in self.link_loads:
+                # Directional Flows
+                uncompressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
+                uncompressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
+
+                # Tot Flow
+                uncompressed_res.data[k + "_tot"] = np.nan_to_num(uncompressed_res.data[k + "_ab"]) + np.nan_to_num(
+                    uncompressed_res.data[k + "_ba"]
+                )
+
+        if compressed:
+            compressed_res = AequilibraeData.empty(
+                memory_mode=True,
+                entries=self.graph.compact_num_links,
+                field_names=fields,
+                data_types=[np.float64] * len(fields),
+                fill=np.nan,
+                index=self.graph.compact_graph.id.values,
+            )
+
+            for k, v in self.compact_link_loads:
+                # Directional Flows
+                compressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
+                compressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
+
+                # Tot Flow
+                compressed_res.data[k + "_tot"] = np.nan_to_num(compressed_res.data[k + "_ab"]) + np.nan_to_num(
+                    compressed_res.data[k + "_ba"]
+                )
+
+        return ((uncompressed_res,) if uncompressed else ()) + ((compressed_res,) if compressed else ())
+
+    def get_select_link_results(self) -> AequilibraeData:
+        raise NotImplementedError()
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index d6ed083f0..eeccbaabd 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -143,7 +143,7 @@ cdef class RouteChoiceSet:
         self.zones = graph.num_zones
         self.block_flows_through_centroids = graph.block_centroid_flows
 
-        self.mapping_idx, self.mapping_data = graph.create_compressed_link_network_mapping()
+        self.mapping_idx, self.mapping_data, _ = graph.create_compressed_link_network_mapping()
 
     def __dealloc__(self):
         """

From 861ea503bd74f69bb079799f3ecb1a0c4f81c126 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 09:35:49 +1000
Subject: [PATCH 21/52] Add link to bfsle paper, add American spelling

---
 aequilibrae/paths/route_choice.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 467ba3bd5..422565a4c 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -13,7 +13,7 @@
 
 
 class RouteChoice:
-    all_algorithms = ["bfsle", "lp", "link-penalisation"]
+    all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
     default_paramaters = {
         "beta": 1.0,
         "theta": 1.0,
@@ -49,7 +49,11 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
     def set_algorithm(self, algorithm: str):
         """
         Chooses the assignment algorithm.
-        Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'
+        Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
+
+        BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler,
+        Michael Balmer & Kay W. Axhausen (2013).
+        https://doi.org/10.1080/18128602.2012.671383
 
         'lp' is also accepted as an alternative to 'link-penalisation'
 
@@ -58,6 +62,7 @@ def set_algorithm(self, algorithm: str):
         """
         algo_dict = {i: i for i in self.all_algorithms}
         algo_dict["lp"] = "link-penalisation"
+        algo_dict["link-penalization"] = "link-penalisation"
         algo = algo_dict.get(algorithm.lower())
 
         if algo is None:
@@ -81,7 +86,7 @@ def set_cores(self, cores: int) -> None:
 
     def set_paramaters(self, par: dict):
         """
-        Sets the parameters for the route choice  TODO, do we want link specific values?
+        Sets the parameters for the route choice.
 
         "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation.
         "penalty" is a link penalisation specific parameter and will have no effect on BFSLE.
@@ -97,11 +102,6 @@ def set_paramaters(self, par: dict):
             it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field,
             specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
 
-
-        Parameter values can be scalars (same values for the entire network) or network field names
-        (link-specific values) - Examples: {'alpha': 0.15, 'beta': 4.0} or  {'alpha': 'alpha', 'beta': 'beta'}
-
-
         :Arguments:
             **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF
         """

From 68c2229011306578eaa0427c0470fbed3c8a5a83 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 11:18:24 +1000
Subject: [PATCH 22/52] Fix lots of small errors in wrapper class

---
 aequilibrae/paths/route_choice.py | 132 ++++++++++++++++++------------
 1 file changed, 78 insertions(+), 54 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 422565a4c..d37ceb7fc 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -1,15 +1,17 @@
-import numpy as np
+import itertools
+import logging
+import pathlib
 import socket
+from typing import List, Optional, Tuple, Union
+from uuid import uuid4
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
 from aequilibrae.context import get_active_project
+from aequilibrae.matrix import AequilibraeMatrix
 from aequilibrae.paths.graph import Graph, _get_graph_to_network_mapping
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
-from aequilibrae.matrix import AequilibraeMatrix, AequilibraeData
-from typing import Optional, Union, Tuple, List
-import pyarrow as pa
-import pathlib
-import itertools
-
-import logging
 
 
 class RouteChoice:
@@ -25,6 +27,7 @@ class RouteChoice:
 
     def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
         self.paramaters = self.default_paramaters.copy()
+        self.procedure_id = uuid4().hex
 
         proj = project or get_active_project(must_exist=False)
         self.project = proj
@@ -43,8 +46,11 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
         self.link_loads: Optional[np.array] = None
         self.results: Optional[pa.Table] = None
         self.where: Optional[pathlib.Path] = None
+        self.save_path_files: bool = False
+
+        self.nodes: Optional[Union[List[int], List[Tuple[int, int]]]] = None
 
-        self.nodes = Optional[Union[List[int], List[Tuple[int, int]]]] = None
+        self._config = {}
 
     def set_algorithm(self, algorithm: str):
         """
@@ -79,12 +85,9 @@ def set_cores(self, cores: int) -> None:
         :Arguments:
             **cores** (:obj:`int`): Number of CPU cores to use
         """
-        if not self.classes:
-            raise RuntimeError("You need load transit classes before overwriting the number of cores")
-
         self.cores = cores
 
-    def set_paramaters(self, par: dict):
+    def set_paramaters(self, **kwargs):
         """
         Sets the parameters for the route choice.
 
@@ -103,13 +106,13 @@ def set_paramaters(self, par: dict):
             specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
 
         :Arguments:
-            **par** (:obj:`dict`): Dictionary with all parameters for the chosen VDF
+            **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm
         """
 
-        if any(key not in self.default_paramaters for key in par.keys()):
+        if any(key not in self.default_paramaters for key in kwargs.keys()):
             raise ValueError("Invalid parameter provided")
 
-        self.paramaters = self.default_paramaters | par
+        self.paramaters = self.default_paramaters | kwargs
 
     def set_save_path_files(self, save_it: bool) -> None:
         """Turn path saving on or off.
@@ -154,18 +157,31 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]):
         elif isinstance(nodes[0], int):
             self.nodes = list(itertools.permutations(nodes, r=2))
 
-    def execute_single(self, origin: int, destination: int):
+    def execute_single(self, origin: int, destination: int, path_size_logit: bool = False):
         if self.__rc is None:
             self.__rc = RouteChoiceSet(self.graph)
 
-        return self.__rc.run(origin, destination, **self.paramaters)
+        self.results = None
+        return self.__rc.run(
+            origin,
+            destination,
+            bfsle=self.algorithm == "bfsle",
+            path_size_logit=path_size_logit,
+            cores=self.cores,
+            **self.paramaters,
+        )
 
     def execute(self, path_size_logit: bool = False):
         if self.__rc is None:
             self.__rc = RouteChoiceSet(self.graph)
 
+        self.results = None
         return self.__rc.batched(
-            self.nodes, bfsle=self.algorithm == "bfsle", path_size_logit=path_size_logit, **self.paramaters
+            self.nodes,
+            bfsle=self.algorithm == "bfsle",
+            path_size_logit=path_size_logit,
+            cores=self.cores,
+            **self.paramaters,
         )
 
     def info(self) -> dict:
@@ -195,7 +211,7 @@ def log_specification(self):
         self.logger.info("Route Choice specification")
         self.logger.info(self._config)
 
-    def results(self):
+    def get_results(self):
         """Returns the results of the route choice procedure
 
         Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
@@ -217,17 +233,19 @@ def results(self):
         return self.results
 
     def get_load_results(
-        self, which: str = "uncompressed"
-    ) -> Union[Tuple[AequilibraeData, AequilibraeData], Tuple[AequilibraeData]]:
+        self,
+        which: str = "uncompressed",
+        clamp: bool = True,
+    ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[pd.DataFrame]]:
         """
         Translates the link loading results from the graph format into the network format.
 
         :Returns:
-            **dataset** (:obj:`tuple[AequilibraeData]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results.
+            **dataset** (:obj:`tuple[pd.DataFrame]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results.
         """
 
         if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]:
-            raise ValueError("`which` argumnet must be one of ['uncompressed', 'compressed', 'both']")
+            raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']")
 
         compressed = which == "both" or which == "compressed"
         uncompressed = which == "both" or which == "uncompressed"
@@ -242,52 +260,58 @@ def get_load_results(
             self.link_loads = {fields[0]: tmp[0]}
             self.compact_link_loads = {fields[0]: tmp[1]}
 
+        if clamp:
+            for v in itertools.chain(self.link_loads.values(), self.compact_link_loads.values()):
+                v[(v < 1e-15)] = 0.0
+
         # Get a mapping from the compressed graph to/from the network graph
         m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values)
+        m_compact = _get_graph_to_network_mapping(
+            self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values
+        )
 
+        lids = np.unique(self.graph.graph.link_id.values)
+        compact_lids = np.unique(self.graph.compact_graph.link_id.values)
         # Create a data store with a row for each uncompressed link
-
         if uncompressed:
-            uncompressed_res = AequilibraeData.empty(
-                memory_mode=True,
-                entries=self.graph.num_links,
-                field_names=fields,
-                data_types=[np.float64] * len(fields),
-                fill=np.nan,
-                index=self.graph.graph.link_id.values,
+            uncompressed_df = pd.DataFrame(
+                {"link_id": lids}
+                | {k + dir: np.zeros(lids.shape) for k in self.link_loads.keys() for dir in ["_ab", "_ba"]}
             )
-
-            for k, v in self.link_loads:
+            for k, v in self.link_loads.items():
                 # Directional Flows
-                uncompressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
-                uncompressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
+                uncompressed_df[k + "_ab"].values[m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
+                uncompressed_df[k + "_ba"].values[m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
 
                 # Tot Flow
-                uncompressed_res.data[k + "_tot"] = np.nan_to_num(uncompressed_res.data[k + "_ab"]) + np.nan_to_num(
-                    uncompressed_res.data[k + "_ba"]
+                uncompressed_df[k + "_tot"] = np.nan_to_num(uncompressed_df[k + "_ab"].values) + np.nan_to_num(
+                    uncompressed_df[k + "_ba"].values
                 )
 
         if compressed:
-            compressed_res = AequilibraeData.empty(
-                memory_mode=True,
-                entries=self.graph.compact_num_links,
-                field_names=fields,
-                data_types=[np.float64] * len(fields),
-                fill=np.nan,
-                index=self.graph.compact_graph.id.values,
+            compressed_df = pd.DataFrame(
+                {"link_id": compact_lids}
+                | {
+                    k + dir: np.zeros(compact_lids.shape)
+                    for k in self.compact_link_loads.keys()
+                    for dir in ["_ab", "_ba"]
+                }
             )
-
-            for k, v in self.compact_link_loads:
-                # Directional Flows
-                compressed_res.data[k + "_ab"][m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
-                compressed_res.data[k + "_ba"][m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
+            for k, v in self.compact_link_loads.items():
+                compressed_df[k + "_ab"].values[m_compact.network_ab_idx] = np.nan_to_num(v[m_compact.graph_ab_idx])
+                compressed_df[k + "_ba"].values[m_compact.network_ba_idx] = np.nan_to_num(v[m_compact.graph_ba_idx])
 
                 # Tot Flow
-                compressed_res.data[k + "_tot"] = np.nan_to_num(compressed_res.data[k + "_ab"]) + np.nan_to_num(
-                    compressed_res.data[k + "_ba"]
+                compressed_df[k + "_tot"] = np.nan_to_num(compressed_df[k + "_ab"].values) + np.nan_to_num(
+                    compressed_df[k + "_ba"].values
                 )
 
-        return ((uncompressed_res,) if uncompressed else ()) + ((compressed_res,) if compressed else ())
+        if uncompressed and not compressed:
+            return uncompressed_df
+        elif not uncompressed and compressed:
+            return compressed_df
+        else:
+            return uncompressed_df, compressed_df
 
-    def get_select_link_results(self) -> AequilibraeData:
+    def get_select_link_results(self) -> pd.DataFrame:
         raise NotImplementedError()

From f0cd2cf7137dfc6515bed0432074f95f2332ec8e Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 11:56:26 +1000
Subject: [PATCH 23/52] Merges set algorithm and set parameters. Better docs

---
 aequilibrae/paths/route_choice.py | 125 +++++++++++++++++++-----------
 1 file changed, 79 insertions(+), 46 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index d37ceb7fc..715b77be7 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -16,13 +16,11 @@
 
 class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
+
     default_paramaters = {
-        "beta": 1.0,
-        "theta": 1.0,
-        "penalty": 1.1,
-        "seed": 0,
-        "max_routes": 0,
-        "max_depth": 0,
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0},
+        "link-penalisation": {"penalty": 1.1},
+        "bfsle": {"beta": 1.0, "theta": 1.0},
     }
 
     def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
@@ -52,10 +50,10 @@ def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
 
         self._config = {}
 
-    def set_algorithm(self, algorithm: str):
+    def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
         """
-        Chooses the assignment algorithm.
-        Options are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
+        Chooses the assignment algorithm and set parameters.
+        Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
 
         BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler,
         Michael Balmer & Kay W. Axhausen (2013).
@@ -63,8 +61,25 @@ def set_algorithm(self, algorithm: str):
 
         'lp' is also accepted as an alternative to 'link-penalisation'
 
+        Setting the parameters for the route choice:
+
+        `beta`, `theta`, and `seed` are BFSLE specific parameters.
+        `penalty` is a link penalisation specific parameter.
+
+        Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms.
+
+        - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is
+            largely dependent on the size of the paths within the network. For very small networks a value of 10
+            is a recommended starting point. For large networks a good starting value is 5. Increase the value
+            until the number of desired routes is being consistently returned.
+
+        - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced,
+            it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field,
+            specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
+
         :Arguments:
             **algorithm** (:obj:`str`): Algorithm to be used
+            **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm
         """
         algo_dict = {i: i for i in self.all_algorithms}
         algo_dict["lp"] = "link-penalisation"
@@ -74,9 +89,16 @@ def set_algorithm(self, algorithm: str):
         if algo is None:
             raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}")
 
+        defaults = self.default_paramaters["generic"] | self.default_paramaters[algo].keys()
+        for key in kwargs.keys():
+            if key not in defaults:
+                raise ValueError(f"Invalid parameter `{key}` provided for algorithm `{algo}`")
+
         self.algorithm = algo
         self._config["Algorithm"] = algo
 
+        self.paramaters = defaults | kwargs
+
     def set_cores(self, cores: int) -> None:
         """Allows one to set the number of cores to be used
 
@@ -87,33 +109,6 @@ def set_cores(self, cores: int) -> None:
         """
         self.cores = cores
 
-    def set_paramaters(self, **kwargs):
-        """
-        Sets the parameters for the route choice.
-
-        "beta", "theta", and "seed" are BFSLE specific parameters and will have no effect on link penalisation.
-        "penalty" is a link penalisation specific parameter and will have no effect on BFSLE.
-
-        Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms.
-
-        - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is
-            largely dependent on the size of the paths within the network. For very small networks a value of 10
-            is a recommended starting point. For large networks a good starting value is 5. Increase the value
-            until the number of desired routes is being consistently returned.
-
-        - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced,
-            it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field,
-            specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
-
-        :Arguments:
-            **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm
-        """
-
-        if any(key not in self.default_paramaters for key in kwargs.keys()):
-            raise ValueError("Invalid parameter provided")
-
-        self.paramaters = self.default_paramaters | kwargs
-
     def set_save_path_files(self, save_it: bool) -> None:
         """Turn path saving on or off.
 
@@ -135,7 +130,7 @@ def set_save_routes(self, where: Optional[str] = None) -> None:
         """
         self.where = pathlib.Path(where) if where is not None else None
 
-    def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]):
+    def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None:
         """
         Prepare OD pairs for batch computation.
 
@@ -157,7 +152,23 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]):
         elif isinstance(nodes[0], int):
             self.nodes = list(itertools.permutations(nodes, r=2))
 
-    def execute_single(self, origin: int, destination: int, path_size_logit: bool = False):
+    def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]:
+        """
+        Generate route choice sets between `origin` and `destination`, potentially performing an assignment.
+
+        Does not require preparation.
+
+        Node IDs must be present in the compressed graph. To make a node ID always appear in the compressed
+        graph add it as a centroid.
+
+        :Arguments:
+            **origin** (:obj:`int`): Origin node ID.
+            **destination** (:obj:`int`): Destination node ID.
+            **perform_assignment** (:obj:`bool`): Whether or not to perform an assignment. Default `False`.
+
+        :Returns:
+            ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs.
+        """
         if self.__rc is None:
             self.__rc = RouteChoiceSet(self.graph)
 
@@ -166,20 +177,36 @@ def execute_single(self, origin: int, destination: int, path_size_logit: bool =
             origin,
             destination,
             bfsle=self.algorithm == "bfsle",
-            path_size_logit=path_size_logit,
+            path_size_logit=perform_assignment,
             cores=self.cores,
             **self.paramaters,
         )
 
-    def execute(self, path_size_logit: bool = False):
+    def execute(self, perform_assignment: bool = False) -> None:
+        """
+        Generate route choice sets between the previously supplied nodes, potentially performing an assignment.
+
+        Node IDs must be present in the compressed graph. To make a node ID always appear in the compressed
+        graph add it as a centroid.
+
+        To access results see `RouteChoice.get_results()`.
+
+        :Arguments:
+            **perform_assignment** (:obj:`bool`): Whether or not to perform an assignment. Default `False`.
+        """
+        if self.nodes is None:
+            raise ValueError(
+                "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`"
+            )
+
         if self.__rc is None:
             self.__rc = RouteChoiceSet(self.graph)
 
         self.results = None
-        return self.__rc.batched(
+        self.__rc.batched(
             self.nodes,
             bfsle=self.algorithm == "bfsle",
-            path_size_logit=path_size_logit,
+            path_size_logit=perform_assignment,
             cores=self.cores,
             **self.paramaters,
         )
@@ -211,14 +238,13 @@ def log_specification(self):
         self.logger.info("Route Choice specification")
         self.logger.info(self._config)
 
-    def get_results(self):
+    def get_results(self) -> pa.Table:
         """Returns the results of the route choice procedure
 
         Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
 
         :Returns:
             **results** (:obj:`pa.Table`): Table with the results of the route choice procedure
-
         """
         if self.results is None:
             try:
@@ -236,12 +262,19 @@ def get_load_results(
         self,
         which: str = "uncompressed",
         clamp: bool = True,
-    ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], Tuple[pd.DataFrame]]:
+    ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
         """
         Translates the link loading results from the graph format into the network format.
 
+        :Arguments:
+            **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`.
+            **clamp** (:obj:`bool`): Whether or not to treat values `< 1e-15` as `0.0`.
+
         :Returns:
-            **dataset** (:obj:`tuple[pd.DataFrame]`): Tuple of uncompressed and compressed AequilibraE data with the link loading results.
+            **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
+                A tuple of uncompressed and compressed DataFrames with the link loading results. Or
+                the requested link loading result.s
+
         """
 
         if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]:

From 2c188164eecae9f2f553198cb708553859e9228f Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 15:44:19 +1000
Subject: [PATCH 24/52] Add example docs and various bug fixes

---
 aequilibrae/paths/route_choice.py             |  35 +++--
 aequilibrae/paths/route_choice_set.pyx        |  12 +-
 .../trip_distribution/plot_route_choice.py    | 124 ++++++++++++++++++
 3 files changed, 160 insertions(+), 11 deletions(-)
 create mode 100644 docs/source/examples/trip_distribution/plot_route_choice.py

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 715b77be7..22eeee3f1 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -23,7 +23,7 @@ class RouteChoice:
         "bfsle": {"beta": 1.0, "theta": 1.0},
     }
 
-    def __init__(self, graph: Graph, matrix: AequilibraeMatrix, project=None):
+    def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None):
         self.paramaters = self.default_paramaters.copy()
         self.procedure_id = uuid4().hex
 
@@ -55,7 +55,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
         Chooses the assignment algorithm and set parameters.
         Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
 
-        BFSLE implemenation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler,
+        BFSLE implementation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler,
         Michael Balmer & Kay W. Axhausen (2013).
         https://doi.org/10.1080/18128602.2012.671383
 
@@ -89,7 +89,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
         if algo is None:
             raise AttributeError(f"Assignment algorithm not available. Choose from: {','.join(self.all_algorithms)}")
 
-        defaults = self.default_paramaters["generic"] | self.default_paramaters[algo].keys()
+        defaults = self.default_paramaters["generic"] | self.default_paramaters[algo]
         for key in kwargs.keys():
             if key not in defaults:
                 raise ValueError(f"Invalid parameter `{key}` provided for algorithm `{algo}`")
@@ -120,7 +120,7 @@ def set_save_path_files(self, save_it: bool) -> None:
 
     def set_save_routes(self, where: Optional[str] = None) -> None:
         """
-        Set save path for route choice resutls. Provide ``None`` to disable.
+        Set save path for route choice results. Provide ``None`` to disable.
 
         **warning** enabling route saving will disable in memory results. Viewing the results will read the results
         from disk first.
@@ -128,7 +128,11 @@ def set_save_routes(self, where: Optional[str] = None) -> None:
         :Arguments:
             **save_it** (:obj:`bool`): Boolean to indicate whether routes should be saved
         """
-        self.where = pathlib.Path(where) if where is not None else None
+        if where is not None:
+            where = pathlib.Path(where)
+            if not where.exists():
+                raise ValueError(f"Path does not exist `{where}`")
+        self.where = where
 
     def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None:
         """
@@ -149,7 +153,7 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None:
                 raise ValueError("`nodes` list contains non-pair elements")
             self.nodes = nodes
 
-        elif isinstance(nodes[0], int):
+        elif isinstance(nodes[0], (int, np.unsignedinteger)):
             self.nodes = list(itertools.permutations(nodes, r=2))
 
     def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]:
@@ -179,6 +183,7 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool
             bfsle=self.algorithm == "bfsle",
             path_size_logit=perform_assignment,
             cores=self.cores,
+            where=str(self.where) if self.where is not None else None,
             **self.paramaters,
         )
 
@@ -208,6 +213,7 @@ def execute(self, perform_assignment: bool = False) -> None:
             bfsle=self.algorithm == "bfsle",
             path_size_logit=perform_assignment,
             cores=self.cores,
+            where=str(self.where) if self.where is not None else None,
             **self.paramaters,
         )
 
@@ -223,7 +229,11 @@ def info(self) -> dict:
             **info** (:obj:`dict`): Dictionary with summary information
         """
 
-        matrix_totals = {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)}
+        matrix_totals = (
+            {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)}
+            if self.matrix is not None
+            else None
+        )
 
         info = {
             "Algorithm": self.algorithm,
@@ -238,11 +248,13 @@ def log_specification(self):
         self.logger.info("Route Choice specification")
         self.logger.info(self._config)
 
-    def get_results(self) -> pa.Table:
+    def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]:
         """Returns the results of the route choice procedure
 
         Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
 
+        If `save_routes` was specified then a Pyarrow dataset is returned. The call is responsible for reading this dataset.
+
         :Returns:
             **results** (:obj:`pa.Table`): Table with the results of the route choice procedure
         """
@@ -251,7 +263,7 @@ def get_results(self) -> pa.Table:
                 self.results = self.__rc.get_results()
             except RuntimeError as err:
                 if self.where is None:
-                    raise ValueError("Route choice results not computed and read/save path not specificed") from err
+                    raise ValueError("Route choice results not computed and read/save path not specified") from err
                 self.results = pa.dataset.dataset(
                     self.where, format="parquet", partitioning=pa.dataset.HivePartitioning(self.schema)
                 )
@@ -280,6 +292,11 @@ def get_load_results(
         if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]:
             raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']")
 
+        if self.matrix is None:
+            raise ValueError(
+                "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute."
+            )
+
         compressed = which == "both" or which == "compressed"
         uncompressed = which == "both" or which == "uncompressed"
 
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index eeccbaabd..b8caf774a 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -217,7 +217,15 @@ cdef class RouteChoiceSet:
                                                          Represents paths from ``origin`` to ``destination``.
         """
         self.batched([(origin, destination)], *args, **kwargs)
-        return [tuple(x) for x in self.get_results().column("route set").to_pylist()]
+        where = kwargs.get("where", None)
+        if where is not None:
+            schema = self.psl_schema if kwargs.get("path_size_logit", False) else self.schema
+            results = pa.dataset.dataset(
+                where, format="parquet", partitioning=pa.dataset.HivePartitioning(schema)
+            ).to_table()
+        else:
+            results = self.get_results()
+        return [tuple(x) for x in results.column("route set").to_pylist()]
 
     # Bounds checking doesn't really need to be disabled here but the warning is annoying
     @cython.boundscheck(False)
@@ -315,7 +323,7 @@ cdef class RouteChoiceSet:
             warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs")
 
         if where is not None:
-            checkpoint = Checkpoint(where, self.schema, partition_cols=["origin id"])
+            checkpoint = Checkpoint(where, self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"])
             batches = list(Checkpoint.batches(list(set_ods)))
             max_results_len = <size_t>max(len(batch) for batch in batches)
         else:
diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py
new file mode 100644
index 000000000..5f285cb22
--- /dev/null
+++ b/docs/source/examples/trip_distribution/plot_route_choice.py
@@ -0,0 +1,124 @@
+"""
+.. _example_usage_route_choice:
+
+Route Choice
+=================
+
+In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La Serena Metropolitan Area in Chile.
+"""
+
+# Imports
+from uuid import uuid4
+from tempfile import gettempdir
+from os.path import join
+from aequilibrae.utils.create_example import create_example
+
+# We create the example project inside our temp folder
+fldr = join(gettempdir(), uuid4().hex)
+
+project = create_example(fldr, "coquimbo")
+
+# %%
+import logging
+import sys
+
+# We the project opens, we can tell the logger to direct all messages to the terminal as well
+logger = project.logger
+stdout_handler = logging.StreamHandler(sys.stdout)
+formatter = logging.Formatter("%(asctime)s;%(levelname)s ; %(message)s")
+stdout_handler.setFormatter(formatter)
+logger.addHandler(stdout_handler)
+
+# %%
+# Route Choice
+# ---------------
+
+# %%
+import numpy as np
+
+# %%
+# Let's build all graphs
+project.network.build_graphs()
+# We get warnings that several fields in the project are filled with NaNs.
+# This is true, but we won't use those fields.
+
+# %%
+# We grab the graph for cars
+graph = project.network.graphs["c"]
+
+# we also see what graphs are available
+project.network.graphs.keys()
+
+# let's say we want to minimize the distance
+graph.set_graph("distance")
+
+# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of western centroids.
+graph.prepare_graph(np.array(list(range(28, 41)) + list(range(49, 91))))
+
+# %%
+# Mock demand matrix
+# ~~~~~~~~~~~~~~~~~~
+# We'll create a mock demand matrix with demand `1` for every zone.
+from aequilibrae.matrix import AequilibraeMatrix
+
+names_list = ["demand", "5x demand"]
+
+mat = AequilibraeMatrix()
+mat.create_empty(zones=graph.num_zones, matrix_names=names_list, memory_only=True)
+mat.index = graph.centroids[:]
+mat.matrices[:, :, 0] = np.full((graph.num_zones, graph.num_zones), 1.0)
+mat.matrices[:, :, 1] = np.full((graph.num_zones, graph.num_zones), 5.0)
+mat.computational_view()
+
+# %%
+# Route Choice class
+# ~~~~~~~~~~~~~~~~~~
+# Here we'll construct and use the Route Choice class to generate our route sets
+from aequilibrae.paths import RouteChoice
+
+# %%
+# This object construct might take a minute depending on the size of the graph due to the construction of the compressed link to network link mapping that's required.
+# This is a one time operation per graph and is cached.
+# We need to supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed.
+rc = RouteChoice(graph, mat)
+
+# %%
+# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE based on the paper
+# "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013).
+# https://doi.org/10.1080/18128602.2012.671383
+# It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results.
+
+# rc.set_choice_set_generation("link-penalisation", max_routes=5, penalty=1.1)
+rc.set_choice_set_generation("bfsle", max_routes=5, beta=1.1, theta=1.1)
+
+# %%
+# All parameters are optional, the defaults are:
+print(rc.default_paramaters)
+
+# %%
+# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid as well an an assignment.
+results = rc.execute_single(28, 90, perform_assignment=True)
+print(results[0])
+
+# %%
+# Because we asked it to also perform an assignment we can access the various results from that
+# The default return is a Pyarrow Table but Pandas is nicer for viewing.
+rc.get_results().to_pandas()
+
+# %%
+# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD pairs we'd like to use, or we can provided a 1D list
+# and the generation will be run on all permutations.
+rc.prepare(graph.centroids[:5])  # You can inspect the result with rc.nodes
+
+# %%
+# Now we can perform a batch computation with an assignment
+rc.execute(perform_assignment=True)
+rc.get_results().to_pandas()
+
+# %%
+# Since we provided a matrix initially we can also perform link loading based on our assignment results.
+# We can specify which link loading we want, either just uncompressed, just compressed, or both.
+rc.get_load_results(which="both")
+
+# %%
+project.close()

From 09c7294b816f9f1b10484af92fe051ecbbe57e69 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 15:46:21 +1000
Subject: [PATCH 25/52] Make deadlock case and error, needs a real fix

---
 aequilibrae/paths/route_choice_set.pyx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index b8caf774a..56217bbd6 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -289,6 +289,10 @@ cdef class RouteChoiceSet:
             if self.nodes_to_indices_view[d] == -1:
                 raise ValueError(f"Destination {d} is not present within the compact graph")
 
+
+        if where is not None and cores != 1:
+            raise NotImplementedError("current implementation suffers from a deadlock when using multithreading and writing to disk")
+
         cdef:
             long long origin_index, dest_index, i
             unsigned int c_max_routes = max_routes

From 757535fe123341f9d45edd005ad32fe24509aa86 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 19 Mar 2024 15:54:52 +1000
Subject: [PATCH 26/52] Enforce single thread for tests

---
 tests/aequilibrae/paths/test_route_choice.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 538b70fc1..781277c29 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -160,7 +160,7 @@ def test_round_trip(self):
         path = join(self.project.project_base_path, "batched results")
         rc.batched(nodes, max_routes=max_routes, max_depth=10)
         table = rc.get_results().to_pandas()
-        rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path)
+        rc.batched(nodes, max_routes=max_routes, max_depth=10, where=path, cores=1)
 
         dataset = pa.dataset.dataset(path, format="parquet", partitioning=pa.dataset.HivePartitioning(rc.schema))
         new_table = (

From 4f6fc869b0af3ae7b8dcbe4a0444e6cd7af1f6aa Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 20 Mar 2024 11:07:11 +1000
Subject: [PATCH 27/52] Fix the "deadlock", code wasn't deadlocking but it was
 running away

---
 aequilibrae/paths/route_choice.py      | 10 +++++++---
 aequilibrae/paths/route_choice_set.pxd |  2 ++
 aequilibrae/paths/route_choice_set.pyx | 26 +++++++++++++++++++-------
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 22eeee3f1..75858c58d 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -18,7 +18,7 @@ class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
 
     default_paramaters = {
-        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0},
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100},
         "link-penalisation": {"penalty": 1.1},
         "bfsle": {"beta": 1.0, "theta": 1.0},
     }
@@ -66,16 +66,20 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
         `beta`, `theta`, and `seed` are BFSLE specific parameters.
         `penalty` is a link penalisation specific parameter.
 
-        Setting `max_depth`, while not required, is strongly recommended to prevent runaway algorithms.
+        Setting `max_depth` or `max_misses`, while not required, is strongly recommended to prevent runaway algorithms.
+        `max_misses` is the maximum amount of duplicate routes found per OD pair. If it is exceeded then the route set
+        if returned with fewer than `max_routes`. It has a default value of `100`.
 
         - When using BFSLE `max_depth` corresponds to the maximum height of the graph of graphs. It's value is
             largely dependent on the size of the paths within the network. For very small networks a value of 10
             is a recommended starting point. For large networks a good starting value is 5. Increase the value
-            until the number of desired routes is being consistently returned.
+            until the number of desired routes is being consistently returned. If it is exceeded then the route set
+            if returned with fewer than `max_routes`.
 
         - When using LP, `max_depth` corresponds to the maximum number of iterations performed. While not enforced,
             it should be higher than `max_routes`. It's value is dependent on the magnitude of the cost field,
             specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
+            If it is exceeded then the route set if returned with fewer than `max_routes`.
 
         :Arguments:
             **algorithm** (:obj:`str`): Algorithm to be used
diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index e80a41c6a..534a854e1 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -170,6 +170,7 @@ cdef class RouteChoiceSet:
         long dest_index,
         unsigned int max_routes,
         unsigned int max_depth,
+        unsigned int max_misses,
         double [:] thread_cost,
         long long [:] thread_predecessors,
         long long [:] thread_conn,
@@ -184,6 +185,7 @@ cdef class RouteChoiceSet:
         long dest_index,
         unsigned int max_routes,
         unsigned int max_depth,
+        unsigned int max_misses,
         double [:] thread_cost,
         long long [:] thread_predecessors,
         long long [:] thread_conn,
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 56217bbd6..eb00fa745 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -237,6 +237,7 @@ cdef class RouteChoiceSet:
             ods: List[Tuple[int, int]],
             max_routes: int = 0,
             max_depth: int = 0,
+            max_misses: int = 100,
             seed: int = 0,
             cores: int = 0,
             a_star: bool = True,
@@ -259,6 +260,7 @@ cdef class RouteChoiceSet:
             **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited.
             **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation.
                                         Must be non-negative. Default of ``0`` for unlimited.
+            **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair. Terminates if exceeded.
             **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``.
             **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available.
             **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``.
@@ -289,14 +291,11 @@ cdef class RouteChoiceSet:
             if self.nodes_to_indices_view[d] == -1:
                 raise ValueError(f"Destination {d} is not present within the compact graph")
 
-
-        if where is not None and cores != 1:
-            raise NotImplementedError("current implementation suffers from a deadlock when using multithreading and writing to disk")
-
         cdef:
             long long origin_index, dest_index, i
             unsigned int c_max_routes = max_routes
             unsigned int c_max_depth = max_depth
+            unsigned int c_max_misses = max_misses
             unsigned int c_seed = seed
             unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads()
 
@@ -399,6 +398,7 @@ cdef class RouteChoiceSet:
                             dest_index,
                             c_max_routes,
                             c_max_depth,
+                            c_max_misses,
                             cost_matrix[threadid()],
                             predecessors_matrix[threadid()],
                             conn_matrix[threadid()],
@@ -413,6 +413,7 @@ cdef class RouteChoiceSet:
                             dest_index,
                             c_max_routes,
                             c_max_depth,
+                            c_max_misses,
                             cost_matrix[threadid()],
                             predecessors_matrix[threadid()],
                             conn_matrix[threadid()],
@@ -539,6 +540,7 @@ cdef class RouteChoiceSet:
         long dest_index,
         unsigned int max_routes,
         unsigned int max_depth,
+        unsigned int max_misses,
         double [:] thread_cost,
         long long [:] thread_predecessors,
         long long [:] thread_conn,
@@ -558,6 +560,8 @@ cdef class RouteChoiceSet:
             unordered_set[long long] *banned
             unordered_set[long long] *new_banned
             vector[long long] *vec
+            pair[RouteSet_t.iterator, bool] status
+            unsigned int miss_count = 0
             long long p, connector
 
         max_routes = max_routes if max_routes != 0 else UINT_MAX
@@ -615,8 +619,9 @@ cdef class RouteChoiceSet:
                             next_queue.push_back(new_banned)
 
                     # The deduplication of routes occurs here
-                    route_set.insert(vec)
-                    if route_set.size() >= max_routes:
+                    status = route_set.insert(vec)
+                    miss_count += not status.second
+                    if miss_count > max_misses or route_set.size() >= max_routes:
                         break
 
             queue.swap(next_queue)
@@ -642,6 +647,7 @@ cdef class RouteChoiceSet:
         long dest_index,
         unsigned int max_routes,
         unsigned int max_depth,
+        unsigned int max_misses,
         double [:] thread_cost,
         long long [:] thread_predecessors,
         long long [:] thread_conn,
@@ -656,6 +662,8 @@ cdef class RouteChoiceSet:
             # Scratch objects
             vector[long long] *vec
             long long p, connector
+            pair[RouteSet_t.iterator, bool] status
+            unsigned int miss_count = 0
 
         max_routes = max_routes if max_routes != 0 else UINT_MAX
         max_depth = max_depth if max_depth != 0 else UINT_MAX
@@ -682,7 +690,11 @@ cdef class RouteChoiceSet:
                 for connector in deref(vec):
                     thread_cost[connector] *= penatly
 
-                route_set.insert(vec)
+                # To prevent runaway algorithms if we find a n duplicate routes we should stop
+                status = route_set.insert(vec)
+                miss_count += not status.second
+                if miss_count > max_misses:
+                    break
             else:
                 break
 

From 0aec7ecf15809ce68ef2f82757cf99a4cf35a8eb Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 20 Mar 2024 11:45:19 +1000
Subject: [PATCH 28/52] Limit pyarrow IO threads, Cython += is funky

---
 aequilibrae/paths/route_choice_set.pyx | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index eb00fa745..72293bc9c 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -316,6 +316,8 @@ cdef class RouteChoiceSet:
 
         # self.a_star = a_star
 
+        pa.set_io_thread_count(cores)
+
         if self.a_star:
             _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64)  # Dummy array to allow slicing
         else:
@@ -620,7 +622,7 @@ cdef class RouteChoiceSet:
 
                     # The deduplication of routes occurs here
                     status = route_set.insert(vec)
-                    miss_count += not status.second
+                    miss_count = miss_count + (not status.second)
                     if miss_count > max_misses or route_set.size() >= max_routes:
                         break
 
@@ -692,7 +694,7 @@ cdef class RouteChoiceSet:
 
                 # To prevent runaway algorithms if we find a n duplicate routes we should stop
                 status = route_set.insert(vec)
-                miss_count += not status.second
+                miss_count = miss_count + (not status.second)
                 if miss_count > max_misses:
                     break
             else:

From 7f8879b6bcc3398591a48039692e431def96b148 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 20 Mar 2024 11:49:39 +1000
Subject: [PATCH 29/52] Pyarrow IO threads must be > 0, give tests from more
 freedom

---
 aequilibrae/paths/route_choice_set.pyx       | 2 +-
 tests/aequilibrae/paths/test_route_choice.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 72293bc9c..f7d82be40 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -316,7 +316,7 @@ cdef class RouteChoiceSet:
 
         # self.a_star = a_star
 
-        pa.set_io_thread_count(cores)
+        pa.set_io_thread_count(c_cores)
 
         if self.a_star:
             _reached_first_matrix = np.zeros((c_cores, 1), dtype=np.int64)  # Dummy array to allow slicing
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 781277c29..57ee9a22b 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -106,7 +106,7 @@ def test_route_choice_batched(self):
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
 
         max_routes = 20
-        rc.batched(nodes, max_routes=max_routes, max_depth=10)
+        rc.batched(nodes, max_routes=max_routes, max_depth=10, max_misses=200)
         results = rc.get_results()
 
         gb = results.to_pandas().groupby(by="origin id")

From ec2127280a736021032db7758e35b5953651c785 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 20 Mar 2024 17:31:07 +1000
Subject: [PATCH 30/52] Better type checks and some tests

---
 aequilibrae/paths/route_choice.py            | 15 +++--
 tests/aequilibrae/paths/test_route_choice.py | 67 ++++++++++++++++++++
 2 files changed, 77 insertions(+), 5 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 75858c58d..40c726947 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -151,14 +151,19 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None:
         if len(nodes) == 0:
             raise ValueError("`nodes` list-like empty.")
 
-        if isinstance(nodes[0], tuple):
-            # Selection of OD pairs
-            if any(len(x) != 2 for x in nodes):
-                raise ValueError("`nodes` list contains non-pair elements")
+        if all(
+            isinstance(pair, tuple)
+            and len(pair) == 2
+            and isinstance(pair[0], (int, np.unsignedinteger))
+            and isinstance(pair[1], (int, np.unsignedinteger))
+            for pair in nodes
+        ):
             self.nodes = nodes
 
-        elif isinstance(nodes[0], (int, np.unsignedinteger)):
+        elif len(nodes) > 1 and all(isinstance(x, (int, np.unsignedinteger)) for x in nodes):
             self.nodes = list(itertools.permutations(nodes, r=2))
+        else:
+            raise ValueError(f"{type(nodes)} or {type(nodes[0])} for not valid types for the `prepare` method")
 
     def execute_single(self, origin: int, destination: int, perform_assignment: bool = False) -> List[Tuple[int]]:
         """
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 57ee9a22b..8d5542f24 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -225,6 +225,73 @@ def test_link_loading(self):
         np.testing.assert_array_almost_equal(link_loads, link_loads2)
 
 
+class TestRouteChoice(TestCase):
+    def setUp(self) -> None:
+        os.environ["PATH"] = os.path.join(gettempdir(), "temp_data") + ";" + os.environ["PATH"]
+
+        proj_path = os.path.join(gettempdir(), "test_route_choice" + uuid.uuid4().hex)
+        os.mkdir(proj_path)
+        zipfile.ZipFile(join(dirname(siouxfalls_project), "sioux_falls_single_class.zip")).extractall(proj_path)
+
+        self.project = Project()
+        self.project.open(proj_path)
+        self.project.network.build_graphs(fields=["distance"], modes=["c"])
+        self.graph = self.project.network.graphs["c"]  # type: Graph
+        self.graph.set_graph("distance")
+        self.graph.set_blocked_centroid_flows(False)
+
+        self.mat = self.project.matrices.get_matrix("demand_omx")
+        self.mat.computational_view()
+
+    def test_prepare(self):
+        rc = RouteChoice(self.graph, self.mat)
+
+        with self.assertRaises(ValueError):
+            rc.prepare([])
+
+        with self.assertRaises(ValueError):
+            rc.prepare(["1", "2"])
+
+        with self.assertRaises(ValueError):
+            rc.prepare([("1", "2")])
+
+        with self.assertRaises(ValueError):
+            rc.prepare([1])
+
+        rc.prepare([1, 2])
+        self.assertListEqual(rc.nodes, [(1, 2), (2, 1)])
+        rc.prepare([(1, 2)])
+        self.assertListEqual(rc.nodes, [(1, 2)])
+
+    def test_set_save_routes(self):
+        rc = RouteChoice(self.graph, self.mat)
+
+        with self.assertRaises(ValueError):
+            rc.set_save_routes("/non-existent-path")
+
+    def test_set_choice_set_generation(self):
+        rc = RouteChoice(self.graph, self.mat)
+
+        rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
+        self.assertDictEqual(
+            rc.paramaters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
+        )
+
+        rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1)
+        self.assertDictEqual(
+            rc.paramaters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}
+        )
+
+        with self.assertRaises(ValueError):
+            rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0)
+
+        with self.assertRaises(ValueError):
+            rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1)
+
+        with self.assertRaises(AttributeError):
+            rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1)
+
+
 def generate_line_strings(project, graph, results):
     """Debug method"""
     import geopandas as gpd

From 6ad539b02d26cae99a5675393f38f908fa1ac4ac Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:11:39 +1000
Subject: [PATCH 31/52] Fix segfault and infinite loop due to miss count

---
 aequilibrae/paths/route_choice_set.pxd |   2 +-
 aequilibrae/paths/route_choice_set.pyx | 331 ++++++++++++++-----------
 setup.cfg                              |   6 +-
 3 files changed, 197 insertions(+), 142 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index 534a854e1..0044a28c6 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -196,7 +196,7 @@ cdef class RouteChoiceSet:
     ) noexcept nogil
 
     @staticmethod
-    cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil
+    cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil
 
     @staticmethod
     cdef vector[double] *compute_cost(RouteSet_t *route_sets, double[:] cost_view) noexcept nogil
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index f7d82be40..e846b0311 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -1,14 +1,43 @@
 # cython: language_level=3str
 
+from aequilibrae.paths.graph import Graph
+
+from cython.operator cimport dereference as deref
+from cython.operator cimport preincrement as inc
+from cython.parallel cimport parallel, prange, threadid
+from libc.limits cimport UINT_MAX
+from libc.math cimport INFINITY, exp, pow
+from libc.stdlib cimport abort
+from libc.string cimport memcpy
+from libcpp cimport nullptr
+from libcpp.algorithm cimport lower_bound, reverse, sort
+from libcpp.unordered_map cimport unordered_map
+from libcpp.unordered_set cimport unordered_set
+from libcpp.utility cimport pair
+from libcpp.vector cimport vector
+from openmp cimport omp_get_num_threads
+
+import itertools
+import logging
+import pathlib
+import warnings
+from typing import List, Tuple
+
+import numpy as np
+import pyarrow as pa
+from aequilibrae.matrix import AequilibraeMatrix
+
+cimport numpy as np  # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython.
+cimport pyarrow as pa
+cimport pyarrow.lib as libpa
+
 """This module aims to implemented the BFS-LE algorithm as described in Rieser-Schüssler, Balmer, and Axhausen, 'Route
 Choice Sets for Very High-Resolution Data'.  https://doi.org/10.1080/18128602.2012.671383
 
-A rough overview of the algorithm is as follows.
-    1. Prepare the initial graph, this is depth 0 with no links removed.
-    2. Find a short path, P. If P is not empty add P to the path set.
-    3. For all links p in P, remove p from E, compounding with the previously removed links.
-    4. De-duplicate the sub-graphs, we only care about unique sub-graphs.
-    5. Go to 2.
+A rough overview of the algorithm is as follows.  1. Prepare the initial graph, this is depth 0 with no links removed.
+    2. Find a short path, P. If P is not empty add P to the path set.  3. For all links p in P, remove p from E,
+    compounding with the previously removed links.  4. De-duplicate the sub-graphs, we only care about unique
+    sub-graphs.  5. Go to 2.
 
 Details: The general idea of the algorithm is pretty simple, as is the implementation. The caveats here is that there is
 a lot of cpp interop and memory management. A description of the purpose of variables is in order:
@@ -26,20 +55,20 @@ independent and should only use commutative operations. The comparator is the sa
 removed link sets we've seen before. This allows us to detected duplicated graphs.
 
 rng: A custom imported version of std::linear_congruential_engine. libcpp doesn't provide one so we do. It should be
-significantly faster than the std::mersenne_twister_engine without sacrificing much. We don't need amazing RNG, just
-ok and fast. This is only used to shuffle the queue.
+significantly faster than the std::mersenne_twister_engine without sacrificing much. We don't need amazing RNG, just ok
+and fast. This is only used to shuffle the queue.
 
-queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these so a
-vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to, building
-up with all the newly removed link sets. These two are swapped at the end of an iteration, next_queue is then
+queue, next_queue: These are vectors of pointers to sets of removed links. We never need to push to the front of these
+so a vector is best. We maintain two queues, one that we are currently iterating over, and one that we can add to,
+building up with all the newly removed link sets. These two are swapped at the end of an iteration, next_queue is then
 cleared. These store sets of removed links.
 
 banned, next_banned: `banned` is the iterator variable for `queue`. `banned` is copied into `next_banned` where another
-link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is immediately
-deallocated. Otherwise it's placed into `next_queue`.
+link can be added without mutating `banned`. If we've already seen this set of removed links `next_banned` is
+immediately deallocated. Otherwise it's placed into `next_queue`.
 
-vec: `vec` is a scratch variable to store pointers to new vectors, or rather, paths while we are building them. Each time a path
-is found a new one is allocated, built, and stored in the route_set.
+vec: `vec` is a scratch variable to store pointers to new vectors, or rather, paths while we are building them. Each
+time a path is found a new one is allocated, built, and stored in the route_set.
 
 p, connector: Scratch variables for iteration.
 
@@ -52,44 +81,12 @@ routes aren't required small-ish things like the memcpy and banned link set copy
 
 """
 
-from aequilibrae.paths.graph import Graph
-
-from libc.math cimport INFINITY, pow, exp
-from libc.string cimport memcpy
-from libc.limits cimport UINT_MAX
-from libc.stdlib cimport abort
-from libcpp cimport nullptr
-from libcpp.vector cimport vector
-from libcpp.unordered_set cimport unordered_set
-from libcpp.unordered_map cimport unordered_map
-from libcpp.utility cimport pair
-from libcpp.algorithm cimport sort, lower_bound, reverse
-from cython.operator cimport dereference as deref, preincrement as inc
-from cython.parallel cimport parallel, prange, threadid
-cimport openmp
-
-import numpy as np
-import pyarrow as pa
-from typing import List, Tuple
-import itertools
-import pathlib
-import logging
-import warnings
-from aequilibrae.matrix import AequilibraeMatrix
-
-cimport numpy as np  # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython.
-cimport pyarrow as pa
-cimport pyarrow.lib as libpa
-import pyarrow.dataset
-import pyarrow.parquet as pq
-from libcpp.memory cimport shared_ptr
-
-from libc.stdio cimport fprintf, printf, stderr
-
-# It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation times
+# It would really be nice if these were modules. The 'include' syntax is long deprecated and adds a lot to compilation
+# times
 include 'basic_path_finding.pyx'
 include 'parallel_numpy.pyx'
 
+
 @cython.embedsignature(True)
 cdef class RouteChoiceSet:
     """
@@ -197,24 +194,24 @@ cdef class RouteChoiceSet:
             del self.ods
             self.ods = prob_set = <vector[pair[long long, long long]] *>nullptr
 
-
     @cython.embedsignature(True)
     def run(self, origin: int, destination: int, *args, **kwargs):
-        """
-        Compute the a route set for a single OD pair.
+        """Compute the a route set for a single OD pair.
 
         Often the returned list's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all
         unique possible paths have been found then a smaller set will be returned.
 
-        Thin wrapper around ``RouteChoiceSet.batched``. Additional arguments are forwarded to ``RouteChoiceSet.batched``.
+        Additional arguments are forwarded to ``RouteChoiceSet.batched``.
 
         :Arguments:
-            **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a centroid.
-            **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to choose a centroid.
+            **origin** (:obj:`int`): Origin node ID. Must be present within compact graph. Recommended to choose a
+                centroid.
+            **destination** (:obj:`int`): Destination node ID. Must be present within compact graph. Recommended to
+                choose a centroid.
+
+        :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of
+        compact link IDs. Represents paths from ``origin`` to ``destination``.
 
-        :Returns:
-            **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of compact link IDs.
-                                                         Represents paths from ``origin`` to ``destination``.
         """
         self.batched([(origin, destination)], *args, **kwargs)
         where = kwargs.get("where", None)
@@ -248,24 +245,29 @@ cdef class RouteChoiceSet:
             beta: float = 1.0,
             theta: float = 1.0,
     ):
-        """
-        Compute the a route set for a list of OD pairs.
+        """Compute the a route set for a list of OD pairs.
 
-        Often the returned list for each OD pair's length is ``max_routes``, however, it may be limited by ``max_depth`` or if all
-        unique possible paths have been found then a smaller set will be returned.
+        Often the returned list for each OD pair's length is ``max_routes``, however, it may be limited by ``max_depth``
+        or if all unique possible paths have been found then a smaller set will be returned.
 
         :Arguments:
-            **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination node ID must be
-                                                    present within compact graph. Recommended to choose a centroids.
-            **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of ``0`` for unlimited.
-            **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link penalisation.
-                                        Must be non-negative. Default of ``0`` for unlimited.
-            **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair. Terminates if exceeded.
+            **ods** (:obj:`list[tuple[int, int]]`): List of OD pairs ``(origin, destination)``. Origin and destination
+                node ID must be present within compact graph. Recommended to choose a centroids.
+            **max_routes** (:obj:`int`): Maximum size of the generated route set. Must be non-negative. Default of
+                ``0`` for unlimited.
+            **max_depth** (:obj:`int`): Maximum depth BFSLE can explore, or maximum number of iterations for link
+                penalisation. Must be non-negative. Default of ``0`` for unlimited.
+            **max_misses** (:obj:`int`): Maximum number of collective duplicate routes found for a single OD pair.
+                Terminates if exceeded.
             **seed** (:obj:`int`): Seed used for rng. Must be non-negative. Default of ``0``.
-            **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative. Default of ``0`` for all available.
-            **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link penalisation. Default ``True``.
-            **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible with ``bfsle=True``.
+            **cores** (:obj:`int`): Number of cores to use when parallelising over OD pairs. Must be non-negative.
+                Default of ``0`` for all available.
+            **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link
+                penalisation. Default ``True``.
+            **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible
+                with ``bfsle=True``.
             **where** (:obj:`str`): Optional file path to save results to immediately. Will return None.
+
         """
         cdef:
             long long o, d
@@ -297,15 +299,19 @@ cdef class RouteChoiceSet:
             unsigned int c_max_depth = max_depth
             unsigned int c_max_misses = max_misses
             unsigned int c_seed = seed
-            unsigned int c_cores = cores if cores > 0 else openmp.omp_get_num_threads()
+            unsigned int c_cores = cores if cores > 0 else omp_get_num_threads()
 
             vector[pair[long long, long long]] c_ods
 
-            # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this memory
+            # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this
+            # memory
             double [:, :] cost_matrix = np.empty((c_cores, self.cost_view.shape[0]), dtype=float)
             long long [:, :] predecessors_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64)
             long long [:, :] conn_matrix = np.empty((c_cores, self.num_nodes + 1), dtype=np.int64)
-            long long [:, :] b_nodes_matrix = np.broadcast_to(self.b_nodes_view, (c_cores, self.b_nodes_view.shape[0])).copy()
+            long long [:, :] b_nodes_matrix = np.broadcast_to(
+                self.b_nodes_view,
+                (c_cores, self.b_nodes_view.shape[0])
+            ).copy()
 
             # This matrix is never read from, it exists to allow using the Dijkstra's method without changing the
             # interface.
@@ -328,7 +334,10 @@ cdef class RouteChoiceSet:
             warnings.warn(f"Duplicate OD pairs found, dropping {len(ods) - len(set_ods)} OD pairs")
 
         if where is not None:
-            checkpoint = Checkpoint(where, self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"])
+            checkpoint = Checkpoint(
+                where,
+                self.psl_schema if path_size_logit else self.schema, partition_cols=["origin id"]
+            )
             batches = list(Checkpoint.batches(list(set_ods)))
             max_results_len = <size_t>max(len(batch) for batch in batches)
         else:
@@ -340,7 +349,6 @@ cdef class RouteChoiceSet:
         cdef:
             RouteSet_t *route_set
             pair[vector[long long] *, vector[long long] *] freq_pair
-            vector[long long] *link_union_scratch = <vector[long long] *>nullptr
             vector[vector[long long] *] *link_union_set = <vector[vector[long long] *] *>nullptr
             vector[vector[double] *] *cost_set = <vector[vector[double] *] *>nullptr
             vector[vector[double] *] *path_overlap_set = <vector[vector[double] *] *>nullptr
@@ -357,7 +365,9 @@ cdef class RouteChoiceSet:
         for batch in batches:
             c_ods = batch  # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer
             batch_len = c_ods.size()
-            results.resize(batch_len)  # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not needed
+            # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not
+            # needed
+            results.resize(batch_len)
 
             if path_size_logit:
                 # we may clear these objects because it's either:
@@ -374,11 +384,6 @@ cdef class RouteChoiceSet:
                 prob_set.resize(batch_len)
 
             with nogil, parallel(num_threads=c_cores):
-                # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
-                # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
-                if path_size_logit:
-                    link_union_scratch = new vector[long long]()
-
                 for i in prange(batch_len):
                     origin_index = self.nodes_to_indices_view[c_ods[i].first]
                     dest_index = self.nodes_to_indices_view[c_ods[i].second]
@@ -426,13 +431,23 @@ cdef class RouteChoiceSet:
                         )
 
                     if path_size_logit:
-                        link_union_scratch.clear()
-                        freq_pair = RouteChoiceSet.compute_frequency(route_set, deref(link_union_scratch))
+                        freq_pair = RouteChoiceSet.compute_frequency(route_set)
                         deref(link_union_set)[i] = freq_pair.first
                         deref(cost_set)[i] = RouteChoiceSet.compute_cost(route_set, self.cost_view)
-                        deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap(route_set, freq_pair, deref(deref(cost_set)[i]), self.cost_view)
-                        deref(prob_set)[i] = RouteChoiceSet.compute_prob(deref(deref(cost_set)[i]), deref(deref(path_overlap_set)[i]), beta, theta)
-                        del freq_pair.second  # While we need the unique sorted links (.first), we don't need the frequencies (.second)
+                        deref(path_overlap_set)[i] = RouteChoiceSet.compute_path_overlap(
+                            route_set,
+                            freq_pair,
+                            deref(deref(cost_set)[i]),
+                            self.cost_view
+                        )
+                        deref(prob_set)[i] = RouteChoiceSet.compute_prob(
+                            deref(deref(cost_set)[i]),
+                            deref(deref(path_overlap_set)[i]),
+                            beta,
+                            theta
+                        )
+                        # While we need the unique sorted links (.first), we don't need the frequencies (.second)
+                        del freq_pair.second
 
                     deref(results)[i] = route_set
 
@@ -446,13 +461,13 @@ cdef class RouteChoiceSet:
                             self.b_nodes_view,
                         )
 
-                if path_size_logit:
-                    del link_union_scratch
-
             if where is not None:
-                table = libpa.pyarrow_wrap_table(self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set))
+                table = libpa.pyarrow_wrap_table(
+                    self.make_table_from_results(c_ods, deref(results), cost_set, path_overlap_set, prob_set)
+                )
 
-                # Once we've made the table all results have been copied into some pyarrow structure, we can free our inner internal structures
+                # Once we've made the table all results have been copied into some pyarrow structure, we can free our
+                # inner internal structures
                 if path_size_logit:
                     for j in range(batch_len):
                         del deref(link_union_set)[j]
@@ -468,7 +483,9 @@ cdef class RouteChoiceSet:
                 checkpoint.write(table)
                 del table
             else:
-                pass  # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in memory
+                # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in
+                # memory
+                pass
 
         # Here we decide if we wish to preserve our results for later saving/link loading
         if where is not None:
@@ -490,8 +507,6 @@ cdef class RouteChoiceSet:
             # we should copy it to keep it around
             self.ods = new vector[pair[long long, long long]](c_ods)
 
-            # self.link_union ?? This could be saved as a partial results from the computation above, although it isn't easy to get out rn
-
     @cython.initializedcheck(False)
     cdef void path_find(
         RouteChoiceSet self,
@@ -569,13 +584,14 @@ cdef class RouteChoiceSet:
         max_routes = max_routes if max_routes != 0 else UINT_MAX
         max_depth = max_depth if max_depth != 0 else UINT_MAX
 
-        queue.push_back(new unordered_set[long long]()) # Start with no edges banned
+        queue.push_back(new unordered_set[long long]())  # Start with no edges banned
         route_set = new RouteSet_t()
         rng.seed(seed)
 
-        # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned edges to consider
+        # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned
+        # edges to consider
         for depth in range(max_depth):
-            if route_set.size() >= max_routes or queue.size() == 0:
+            if miss_count > max_misses or route_set.size() >= max_routes or queue.size() == 0:
                 break
 
             # If we could potentially fill the route_set after this depth, shuffle the queue
@@ -583,13 +599,23 @@ cdef class RouteChoiceSet:
                 shuffle(queue.begin(), queue.end(), rng)
 
             for banned in queue:
-                # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and reverse them as well
+                # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and
+                # reverse them as well
                 memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double))
 
                 for connector in deref(banned):
                     thread_cost[connector] = INFINITY
 
-                RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first)
+                RouteChoiceSet.path_find(
+                    self,
+                    origin_index,
+                    dest_index,
+                    thread_cost,
+                    thread_predecessors,
+                    thread_conn,
+                    thread_b_nodes,
+                    _thread_reached_first
+                )
 
                 # Mark this set of banned links as seen
                 removed_links.insert(banned)
@@ -597,7 +623,8 @@ cdef class RouteChoiceSet:
                 # If the destination is reachable we must build the path and readd
                 if thread_predecessors[dest_index] >= 0:
                     vec = new vector[long long]()
-                    # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be
+                    # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know
+                    # how long it'll be
                     p = dest_index
                     while p != origin_index:
                         connector = thread_conn[p]
@@ -607,14 +634,16 @@ cdef class RouteChoiceSet:
                     reverse(vec.begin(), vec.end())
 
                     for connector in deref(vec):
-                        # This is one area for potential improvement. Here we construct a new set from the old one, copying all the elements
-                        # then add a single element. An incremental set hash function could be of use. However, the since of this set is
-                        # directly dependent on the current depth and as the route set size grows so incredibly fast the depth will rarely get
-                        # high enough for this to matter.
-                        # Copy the previously banned links, then for each vector in the path we add one and push it onto our queue
+                        # This is one area for potential improvement. Here we construct a new set from the old one,
+                        # copying all the elements then add a single element. An incremental set hash function could be
+                        # of use. However, the since of this set is directly dependent on the current depth and as the
+                        # route set size grows so incredibly fast the depth will rarely get high enough for this to
+                        # matter. Copy the previously banned links, then for each vector in the path we add one and
+                        # push it onto our queue
                         new_banned = new unordered_set[long long](deref(banned))
                         new_banned.insert(connector)
-                        # If we've already seen this set of removed links before we already know what the path is and its in our route set
+                        # If we've already seen this set of removed links before we already know what the path is and
+                        # its in our route set
                         if removed_links.find(new_banned) != removed_links.end():
                             del new_banned
                         else:
@@ -676,11 +705,21 @@ cdef class RouteChoiceSet:
             if route_set.size() >= max_routes:
                 break
 
-            RouteChoiceSet.path_find(self, origin_index, dest_index, thread_cost, thread_predecessors, thread_conn, thread_b_nodes, _thread_reached_first)
+            RouteChoiceSet.path_find(
+                self,
+                origin_index,
+                dest_index,
+                thread_cost,
+                thread_predecessors,
+                thread_conn,
+                thread_b_nodes,
+                _thread_reached_first
+            )
 
             if thread_predecessors[dest_index] >= 0:
                 vec = new vector[long long]()
-                # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how long it'll be
+                # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how
+                # long it'll be
                 p = dest_index
                 while p != origin_index:
                     connector = thread_conn[p]
@@ -707,17 +746,18 @@ cdef class RouteChoiceSet:
     @cython.boundscheck(False)
     @cython.initializedcheck(False)
     @staticmethod
-    cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set, vector[long long] &link_union) noexcept nogil:
+    cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil:
         cdef:
             vector[long long] *keys
             vector[long long] *counts
+            vector[long long] link_union
+            vector[long long].const_iterator union_iter
+            vector[long long] *route
 
             # Scratch objects
             size_t length, count
             long long link, i
 
-        link_union.clear()
-
         keys = new vector[long long]()
         counts = new vector[long long]()
 
@@ -731,8 +771,8 @@ cdef class RouteChoiceSet:
 
         sort(link_union.begin(), link_union.end())
 
-        union_iter = link_union.begin()
-        while union_iter != link_union.end():
+        union_iter = link_union.cbegin()
+        while union_iter != link_union.cend():
             count = 0
             link = deref(union_iter)
             while link == deref(union_iter):
@@ -809,7 +849,8 @@ cdef class RouteChoiceSet:
                 # We want to find the index of the link, and use that to look up it's frequency
                 link_iter = lower_bound(freq_set.first.begin(), freq_set.first.end(), link)
 
-                path_overlap = path_overlap + cost_view[link] / deref(freq_set.second)[link_iter - freq_set.first.begin()]
+                path_overlap = path_overlap + cost_view[link] \
+                    / deref(freq_set.second)[link_iter - freq_set.first.begin()]
 
             path_overlap_vec.push_back(path_overlap / total_cost[j])
 
@@ -838,11 +879,13 @@ cdef class RouteChoiceSet:
         prob_vec = new vector[double]()
         prob_vec.reserve(total_cost.size())
 
-        # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will lead to NaN results
+        # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will
+        # lead to NaN results
         for i in range(total_cost.size()):
             inv_prob = 0.0
             for j in range(total_cost.size()):
-                inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) * exp(-theta * (total_cost[j] - total_cost[i]))
+                inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \
+                    * exp(-theta * (total_cost[j] - total_cost[i]))
 
             prob_vec.push_back(1.0 / inv_prob)
 
@@ -858,7 +901,7 @@ cdef class RouteChoiceSet:
         if not isinstance(matrix, AequilibraeMatrix):
             raise ValueError("`matrix` is not an AequilibraE matrix")
 
-        cores = cores if cores > 0 else openmp.omp_get_num_threads()
+        cores = cores if cores > 0 else omp_get_num_threads()
 
         cdef:
             vector[vector[double] *] *path_files = <vector[vector[double] *] *>nullptr
@@ -879,7 +922,6 @@ cdef class RouteChoiceSet:
                 tmp.append(deref(vec))
             print(tmp)
 
-
         def apply_link_loading_func(m):
             if generate_path_files:
                 ll = self.apply_link_loading_from_path_files(
@@ -889,7 +931,8 @@ cdef class RouteChoiceSet:
             else:
                 ll = self.apply_link_loading(m)
 
-            # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without transferring owner ship.
+            # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without
+            # transferring owner ship.
             compressed = <double[:ll.size(), :1]>&deref(ll)[0]
 
             actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
@@ -903,7 +946,6 @@ cdef class RouteChoiceSet:
             del ll
             return actual.reshape(-1), compressed.reshape(-1)
 
-
         if len(matrix.view_names) == 1:
             link_loads = apply_link_loading_func(matrix.matrix_view)
         else:
@@ -945,9 +987,6 @@ cdef class RouteChoiceSet:
             long long i
 
         with parallel(num_threads=cores):
-            # The link union needs to be allocated per thread as scratch space, as its of unknown length we can't allocated a matrix of them.
-            # Additionally getting them to be reused between batches is complicated, instead we just get a new one each batch
-
             for i in prange(ods.size()):
                 link_union = link_union_set[i]
                 loads = new vector[double](link_union.size(), 0.0)  # FIXME FREE ME
@@ -961,15 +1000,18 @@ cdef class RouteChoiceSet:
                     if prob == 0.0:
                         continue
 
-                    # For each link in the route, we need to assign the appropriate demand * prob
-                    # Because the link union is known to be sorted, if the links in the route are also sorted we can just step
-                    # along both arrays simultaneously, skipping elements in the link_union when appropriate. This allows us
-                    # to operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense formulation.
-                    # This is also incredibly cache efficient, the only downsides are that the code is harder to read
-                    # and it requires sorting the route. NOTE: the sorting of routes is technically something that is already
-                    # computed, during the computation of the link frequency we merge and sort all links, if we instead sorted
-                    # then used an N-way merge we could reuse the sorted routes and the sorted link union.
-                    links = new vector[long long](deref(route))  # we copy the links in case the routes haven't already been saved  # FIXME FREE ME
+                    # For each link in the route, we need to assign the appropriate demand * prob Because the link union
+                    # is known to be sorted, if the links in the route are also sorted we can just step along both
+                    # arrays simultaneously, skipping elements in the link_union when appropriate. This allows us to
+                    # operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense
+                    # formulation.  This is also incredibly cache efficient, the only downsides are that the code is
+                    # harder to read and it requires sorting the route. NOTE: the sorting of routes is technically
+                    # something that is already computed, during the computation of the link frequency we merge and sort
+                    # all links, if we instead sorted then used an N-way merge we could reuse the sorted routes and the
+                    # sorted link union.
+
+                    # We copy the links in case the routes haven't already been saved  # FIXME FREE ME
+                    links = new vector[long long](deref(route))
                     sort(links.begin(), links.end())
 
                     # links and link_union are sorted, and links is a subset of link_union
@@ -1024,7 +1066,7 @@ cdef class RouteChoiceSet:
 
             for j in range(link_union.size()):
                 link = deref(link_union)[j]
-                deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j]  # += here results in all zeros? Odd
+                deref(link_loads)[link] = deref(link_loads)[link] + demand * deref(loads)[j]
 
         return link_loads
 
@@ -1117,8 +1159,9 @@ cdef class RouteChoiceSet:
         for i in range(ods.size()):
             route_set = route_sets[i]
 
-            # Instead of construction a "list of lists" style object for storing the route sets we instead will construct one big array of link ids
-            # with a corresponding offsets array that indicates where each new row (path) starts.
+            # Instead of construction a "list of lists" style object for storing the route sets we instead will
+            # construct one big array of link ids with a corresponding offsets array that indicates where each new row
+            # (path) starts.
             for route in deref(route_set):
                 o_col.Append(ods[i].first)
                 d_col.Append(ods[i].second)
@@ -1141,7 +1184,13 @@ cdef class RouteChoiceSet:
         offset_builder.Append(offset)  # Mark the end of the array in offsets
         offset_builder.Finish(&offsets)
 
-        route_set_results = libpa.CListArray.FromArraysAndType(route_set_dtype, deref(offsets.get()), deref(paths.get()), pool, shared_ptr[libpa.CBuffer]())
+        route_set_results = libpa.CListArray.FromArraysAndType(
+            route_set_dtype,
+            deref(offsets.get()),
+            deref(paths.get()),
+            pool,
+            shared_ptr[libpa.CBuffer]()
+        )
 
         o_col.Finish(&columns[0])
         d_col.Finish(&columns[1])
@@ -1152,7 +1201,9 @@ cdef class RouteChoiceSet:
             path_overlap_col.Finish(&columns[4])
             prob_col.Finish(&columns[5])
 
-        cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema)
+        cdef shared_ptr[libpa.CSchema] schema = libpa.pyarrow_unwrap_schema(
+            RouteChoiceSet.psl_schema if psl else RouteChoiceSet.schema
+        )
         cdef shared_ptr[libpa.CTable] table = libpa.CTable.MakeFromArrays(schema, columns)
 
         del path_builder
@@ -1195,7 +1246,7 @@ cdef class Checkpoint:
     A small wrapper class to write a dataset partition by partition
     """
 
-    def __init__(self, where, schema, partition_cols = None):
+    def __init__(self, where, schema, partition_cols=None):
         """Python level init, may be called multiple times, for things that can't be done in __cinit__."""
         self.where = pathlib.Path(where)
         self.schema = schema
diff --git a/setup.cfg b/setup.cfg
index eb3eee575..0651c05d8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -2,4 +2,8 @@
 universal = 1
 
 [metadata]
-license_files = [LICENSE.TXT]
\ No newline at end of file
+license_files = [LICENSE.TXT]
+
+[pycodestyle]
+max-line-length = 120
+ignore = E225
\ No newline at end of file

From 5b5f6aed4372ff1377f4c8a0007a30ba66de8ec1 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:12:31 +1000
Subject: [PATCH 32/52] Spelling, remove clamping, make algorithm positional or
 keyword arg

---
 aequilibrae/paths/route_choice.py            | 21 ++++++++------------
 tests/aequilibrae/paths/test_route_choice.py |  4 ++--
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 40c726947..56c4b0751 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -24,7 +24,7 @@ class RouteChoice:
     }
 
     def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None):
-        self.paramaters = self.default_paramaters.copy()
+        self.parameters = self.default_paramaters.copy()
         self.procedure_id = uuid4().hex
 
         proj = project or get_active_project(must_exist=False)
@@ -50,7 +50,7 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro
 
         self._config = {}
 
-    def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
+    def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
         """
         Chooses the assignment algorithm and set parameters.
         Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
@@ -101,7 +101,7 @@ def set_choice_set_generation(self, algorithm: str, **kwargs) -> None:
         self.algorithm = algo
         self._config["Algorithm"] = algo
 
-        self.paramaters = defaults | kwargs
+        self.parameters = defaults | kwargs
 
     def set_cores(self, cores: int) -> None:
         """Allows one to set the number of cores to be used
@@ -154,8 +154,8 @@ def prepare(self, nodes: Union[List[int], List[Tuple[int, int]]]) -> None:
         if all(
             isinstance(pair, tuple)
             and len(pair) == 2
-            and isinstance(pair[0], (int, np.unsignedinteger))
-            and isinstance(pair[1], (int, np.unsignedinteger))
+            and isinstance(pair[0], (int, np.integer))
+            and isinstance(pair[1], (int, np.integer))
             for pair in nodes
         ):
             self.nodes = nodes
@@ -193,7 +193,7 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool
             path_size_logit=perform_assignment,
             cores=self.cores,
             where=str(self.where) if self.where is not None else None,
-            **self.paramaters,
+            **self.parameters,
         )
 
     def execute(self, perform_assignment: bool = False) -> None:
@@ -223,7 +223,7 @@ def execute(self, perform_assignment: bool = False) -> None:
             path_size_logit=perform_assignment,
             cores=self.cores,
             where=str(self.where) if self.where is not None else None,
-            **self.paramaters,
+            **self.parameters,
         )
 
     def info(self) -> dict:
@@ -249,7 +249,7 @@ def info(self) -> dict:
             "Matrix totals": matrix_totals,
             "Computer name": socket.gethostname(),
             "Procedure ID": self.procedure_id,
-            "Parameters": self.paramaters,
+            "Parameters": self.parameters,
         }
         return info
 
@@ -289,7 +289,6 @@ def get_load_results(
 
         :Arguments:
             **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`.
-            **clamp** (:obj:`bool`): Whether or not to treat values `< 1e-15` as `0.0`.
 
         :Returns:
             **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
@@ -319,10 +318,6 @@ def get_load_results(
             self.link_loads = {fields[0]: tmp[0]}
             self.compact_link_loads = {fields[0]: tmp[1]}
 
-        if clamp:
-            for v in itertools.chain(self.link_loads.values(), self.compact_link_loads.values()):
-                v[(v < 1e-15)] = 0.0
-
         # Get a mapping from the compressed graph to/from the network graph
         m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values)
         m_compact = _get_graph_to_network_mapping(
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 8d5542f24..7ed57c67c 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -274,12 +274,12 @@ def test_set_choice_set_generation(self):
 
         rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
         self.assertDictEqual(
-            rc.paramaters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
+            rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
         )
 
         rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1)
         self.assertDictEqual(
-            rc.paramaters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}
+            rc.parameters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}
         )
 
         with self.assertRaises(ValueError):

From 858e60cd54e7ca531a7bd7b5b1c513b8600e67da Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:20:05 +1000
Subject: [PATCH 33/52] Forget import

---
 aequilibrae/paths/route_choice_set.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index e846b0311..ef7b2126d 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -1,6 +1,7 @@
 # cython: language_level=3str
 
 from aequilibrae.paths.graph import Graph
+from aequilibrae.matrix import AequilibraeMatrix
 
 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as inc
@@ -25,7 +26,8 @@ from typing import List, Tuple
 
 import numpy as np
 import pyarrow as pa
-from aequilibrae.matrix import AequilibraeMatrix
+import pyarrow.dataset
+import pyarrow.parquet as pq
 
 cimport numpy as np  # Numpy *must* be cimport'd BEFORE pyarrow.lib, there's nothing quite like Cython.
 cimport pyarrow as pa

From a8c72db884771e089c553303b1f856ffebb87bb6 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:21:45 +1000
Subject: [PATCH 34/52] Skip 3.9 builds

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8fab0fcf1..8abeb35de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,10 +59,10 @@ max-complexity = 20
 [tool.cibuildwheel]
 # While we test this facility we we will only build for 3.10
 #build = ["cp310-*"]
-build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"]
+build = ["cp310-*", "cp311-*", "cp312-*"]
 
 # We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows
-skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"]
+skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"]
 test-skip = ""
 
 archs = ["auto"]

From bb738647e82737d47b790a18889ab8fbc147baac Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:23:01 +1000
Subject: [PATCH 35/52] Revert "Skip 3.9 builds"

This reverts commit a8c72db884771e089c553303b1f856ffebb87bb6.
---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8abeb35de..8fab0fcf1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -59,10 +59,10 @@ max-complexity = 20
 [tool.cibuildwheel]
 # While we test this facility we we will only build for 3.10
 #build = ["cp310-*"]
-build = ["cp310-*", "cp311-*", "cp312-*"]
+build = ["cp39-*","cp310-*", "cp311-*", "cp312-*"]
 
 # We do not build wheels for Python 3.6 or 3.7, or for 32-bit in either Linux or Windows
-skip = ["cp36-*", "cp37-*", "cp38-*", "cp39-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"]
+skip = ["cp36-*", "cp37-*", "cp38-*", "*-win32", "*-manylinux_i686", "*-musllinux*", "*-s390x-*", "*-ppc64le-*"]
 test-skip = ""
 
 archs = ["auto"]

From 2c69ecd89f3e24f06306f28344b3eac5f4f4969b Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 14:28:54 +1000
Subject: [PATCH 36/52] Drop 3.8 from unit tests

---
 .github/workflows/unit_tests.yml | 2 +-
 setup.py                         | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
index b8f65c694..fa8fb93f0 100644
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@@ -31,7 +31,7 @@ jobs:
     runs-on: ${{ matrix.os}}
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9', '3.10', '3.11', '3.12']
         os: [windows-latest, ubuntu-latest]
 
       max-parallel: 20
diff --git a/setup.py b/setup.py
index e9a851217..804ac8f38 100644
--- a/setup.py
+++ b/setup.py
@@ -123,7 +123,6 @@
         license_files=("LICENSE.TXT",),
         classifiers=[
             "Programming Language :: Python",
-            "Programming Language :: Python :: 3.8",
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",

From f55eac93e6c051330f66e56bfdf1af0334d019bf Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 27 Mar 2024 16:31:17 +1000
Subject: [PATCH 37/52] Don't run off the end of the vector

---
 aequilibrae/paths/route_choice_set.pyx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index ef7b2126d..fe8ccfc71 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -777,7 +777,7 @@ cdef class RouteChoiceSet:
         while union_iter != link_union.cend():
             count = 0
             link = deref(union_iter)
-            while link == deref(union_iter):
+            while link == deref(union_iter) and union_iter != link_union.cend():
                 count = count + 1
                 inc(union_iter)
 
@@ -1022,7 +1022,7 @@ cdef class RouteChoiceSet:
 
                     while link_iter != links.cend():
                         # Find the next location for the current link in links
-                        while deref(link_iter) != deref(link_union_iter):
+                        while deref(link_iter) != deref(link_union_iter) and link_iter != links.cend():
                             inc(link_union_iter)
 
                         link_loc = link_union_iter - link_union.cbegin()

From 0c7f1db435e25c107a724fa04d98bc6f691b529f Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 23 Apr 2024 17:13:55 +1000
Subject: [PATCH 38/52] Remove FIXMEs, update docs strings, spelling errors

---
 aequilibrae/paths/route_choice.py            |  11 +-
 aequilibrae/paths/route_choice_set.pxd       |  11 +-
 aequilibrae/paths/route_choice_set.pyx       | 153 +++++++++++--------
 tests/aequilibrae/paths/test_route_choice.py |   5 +-
 4 files changed, 105 insertions(+), 75 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 56c4b0751..e41002047 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -229,7 +229,7 @@ def execute(self, perform_assignment: bool = False) -> None:
     def info(self) -> dict:
         """Returns information for the transit assignment procedure
 
-        Dictionary contains keys  'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID'.
+        Dictionary contains keys  'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', and 'Parameters'.
 
         The classes key is also a dictionary with all the user classes per transit class and their respective
         matrix totals
@@ -260,9 +260,10 @@ def log_specification(self):
     def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]:
         """Returns the results of the route choice procedure
 
-        Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns). Represents paths from ``origin`` to ``destination``.
+        Returns a table of OD pairs to lists of link IDs for each OD pair provided (as columns).
+        Represents paths from ``origin`` to ``destination``.
 
-        If `save_routes` was specified then a Pyarrow dataset is returned. The call is responsible for reading this dataset.
+        If `save_routes` was specified then a Pyarrow dataset is returned. The caller is responsible for reading this dataset.
 
         :Returns:
             **results** (:obj:`pa.Table`): Table with the results of the route choice procedure
@@ -292,8 +293,8 @@ def get_load_results(
 
         :Returns:
             **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
-                A tuple of uncompressed and compressed DataFrames with the link loading results. Or
-                the requested link loading result.s
+                A tuple of uncompressed and compressed link loading results as DataFrames. Or
+                the requested link loading results.
 
         """
 
diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index 0044a28c6..a8c317796 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -43,7 +43,7 @@ cdef extern from "<algorithm>" namespace "std" nogil:
 cdef extern from "<utility>" namespace "std" nogil:
     pair[T, U] make_pair[T, U](T&& t, U&& u)
 
-# To define our own hashing functions we have to write a little cpp. The string is inlined directly into route_choice.cpp
+# To define our own hashing functions we have to write a little C++. The string is inlined directly into route_choice.cpp
 # To make Cython aware of our hash types we also must declare them with the right signatures
 #
 # OrderedVectorPointerHasher: This hash function is for hashing the routes, thus it should be order *DEPENDENT*.
@@ -55,8 +55,8 @@ cdef extern from "<utility>" namespace "std" nogil:
 # New hash functions and their use in authentication and set equality
 # https://doi.org/10.1016/0022-0000(81)90033-7
 #
-# PointerDereferenceEqualTo: Because we are storing and hashing the pointers to objects to avoid unnessecary copies we must
-# define our own comparitor to resolve hash collisions. Without this equaility operator the bare pointers are compared.
+# PointerDereferenceEqualTo: Because we are storing and hashing the pointers to objects to avoid unnecessary copies we must
+# define our own comparator to resolve hash collisions. Without this equality operator the bare pointers are compared.
 cdef extern from * nogil:
     """
     // Source: https://stackoverflow.com/a/72073933
@@ -104,7 +104,7 @@ cdef extern from * nogil:
         bool operator()(const T& lhs, const T& rhs) const
 
 
-# For typing (haha) convenince, the types names are getting long
+# For typing (haha) convenience, the types names are getting long
 ctypedef unordered_set[vector[long long] *, OrderedVectorPointerHasher, PointerDereferenceEqualTo[vector[long long] *]] RouteSet_t
 ctypedef unordered_set[unordered_set[long long] *, UnorderedSetPointerHasher, PointerDereferenceEqualTo[unordered_set[long long] *]] LinkSet_t
 ctypedef vector[pair[unordered_set[long long] *, vector[long long] *]] RouteMap_t
@@ -153,6 +153,8 @@ cdef class RouteChoiceSet:
         unsigned int [:] mapping_idx
         unsigned int [:] mapping_data
 
+    cdef void deallocate(RouteChoiceSet self) nogil
+
     cdef void path_find(
         RouteChoiceSet self,
         long origin_index,
@@ -228,6 +230,7 @@ cdef class RouteChoiceSet:
 
     cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil
     cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil
+    cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores)
 
     cdef shared_ptr[libpa.CTable] make_table_from_results(
         RouteChoiceSet self,
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index fe8ccfc71..5dbdba814 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -16,7 +16,7 @@ from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
 from libcpp.utility cimport pair
 from libcpp.vector cimport vector
-from openmp cimport omp_get_num_threads
+from openmp cimport omp_get_max_threads
 
 import itertools
 import logging
@@ -147,14 +147,12 @@ cdef class RouteChoiceSet:
     def __dealloc__(self):
         """
         C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
-        partially deallocated state already.
+        partially deallocated state already. Do not call any other Python method.
         """
-        self.deallocate_results()
+        self.deallocate()
 
-    def deallocate_results(self):
-        """
-        Deallocate stored results, existing extracted results are not invalidated.
-        """
+    cdef void deallocate(RouteChoiceSet self) nogil:
+        """__dealloc__ cannot be called from normal code."""
         cdef:
             RouteSet_t *route_set
             vector[long long] *link_vec
@@ -212,8 +210,7 @@ cdef class RouteChoiceSet:
                 choose a centroid.
 
         :Returns: **route set** (:obj:`list[tuple[int, ...]]): Returns a list of unique variable length tuples of
-        compact link IDs. Represents paths from ``origin`` to ``destination``.
-
+            link IDs. Represents paths from ``origin`` to ``destination``.
         """
         self.batched([(origin, destination)], *args, **kwargs)
         where = kwargs.get("where", None)
@@ -269,7 +266,6 @@ cdef class RouteChoiceSet:
             **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible
                 with ``bfsle=True``.
             **where** (:obj:`str`): Optional file path to save results to immediately. Will return None.
-
         """
         cdef:
             long long o, d
@@ -301,7 +297,7 @@ cdef class RouteChoiceSet:
             unsigned int c_max_depth = max_depth
             unsigned int c_max_misses = max_misses
             unsigned int c_seed = seed
-            unsigned int c_cores = cores if cores > 0 else omp_get_num_threads()
+            unsigned int c_cores = cores if cores > 0 else omp_get_max_threads()
 
             vector[pair[long long, long long]] c_ods
 
@@ -362,10 +358,10 @@ cdef class RouteChoiceSet:
             path_overlap_set = new vector[vector[double] *](max_results_len)
             prob_set = new vector[vector[double] *](max_results_len)
 
-        self.deallocate_results()  # We have be storing results from a previous run
+        self.deallocate()  # We may be storing results from a previous run
 
         for batch in batches:
-            c_ods = batch  # Convert the batch to a cpp vector, this isn't strictly efficient but is nicer
+            c_ods = batch  # Convert the batch to a C++ vector, this isn't strictly efficient but is nicer
             batch_len = c_ods.size()
             # We know we've allocated enough size to store all max length batch but we resize to a smaller size when not
             # needed
@@ -485,8 +481,8 @@ cdef class RouteChoiceSet:
                 checkpoint.write(table)
                 del table
             else:
-                # where is None ==> len(batches) == 1, i.e. there was only one batch and we should keep everything in
-                # memory
+                # where is None implies len(batches) == 1, i.e. there was only one batch and we should keep everything
+                # in memory
                 pass
 
         # Here we decide if we wish to preserve our results for later saving/link loading
@@ -625,7 +621,7 @@ cdef class RouteChoiceSet:
                 # If the destination is reachable we must build the path and readd
                 if thread_predecessors[dest_index] >= 0:
                     vec = new vector[long long]()
-                    # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know
+                    # Walk the predecessors tree to find our path, we build it up in a C++ vector because we can't know
                     # how long it'll be
                     p = dest_index
                     while p != origin_index:
@@ -689,6 +685,7 @@ cdef class RouteChoiceSet:
         double penatly,
         unsigned int seed
     ) noexcept nogil:
+        """Link penalisation algorithm for choice set generation."""
         cdef:
             RouteSet_t *route_set
 
@@ -720,7 +717,7 @@ cdef class RouteChoiceSet:
 
             if thread_predecessors[dest_index] >= 0:
                 vec = new vector[long long]()
-                # Walk the predecessors tree to find our path, we build it up in a cpp vector because we can't know how
+                # Walk the predecessors tree to find our path, we build it up in a C++ vector because we can't know how
                 # long it'll be
                 p = dest_index
                 while p != origin_index:
@@ -728,12 +725,12 @@ cdef class RouteChoiceSet:
                     p = thread_predecessors[p]
                     vec.push_back(connector)
 
-                reverse(vec.begin(), vec.end())
-
                 for connector in deref(vec):
                     thread_cost[connector] *= penatly
 
-                # To prevent runaway algorithms if we find a n duplicate routes we should stop
+                reverse(vec.begin(), vec.end())
+
+                # To prevent runaway algorithms if we find N duplicate routes we should stop
                 status = route_set.insert(vec)
                 miss_count = miss_count + (not status.second)
                 if miss_count > max_misses:
@@ -749,6 +746,11 @@ cdef class RouteChoiceSet:
     @cython.initializedcheck(False)
     @staticmethod
     cdef pair[vector[long long] *, vector[long long] *] compute_frequency(RouteSet_t *route_set) noexcept nogil:
+        """
+        Compute a frequency map for each route.
+
+        Each node at index i in the first returned vector has frequency at index i in the second vector.
+        """
         cdef:
             vector[long long] *keys
             vector[long long] *counts
@@ -792,6 +794,7 @@ cdef class RouteChoiceSet:
     @cython.initializedcheck(False)
     @staticmethod
     cdef vector[double] *compute_cost(RouteSet_t *route_set, double[:] cost_view) noexcept nogil:
+        """Compute the cost each route."""
         cdef:
             vector[double] *cost_vec
 
@@ -823,6 +826,8 @@ cdef class RouteChoiceSet:
         double[:] cost_view
     ) noexcept nogil:
         """
+        Compute the path overlap figure based on the route cost and frequency.
+
         Notation changes:
             i: j
             a: link
@@ -871,6 +876,7 @@ cdef class RouteChoiceSet:
         double beta,
         double theta
     ) noexcept nogil:
+        """Compute a probability for each route in the route set based on the path overlap."""
         cdef:
             # Scratch objects
             vector[double] *prob_vec
@@ -895,6 +901,9 @@ cdef class RouteChoiceSet:
 
     @cython.embedsignature(True)
     def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0):
+        """
+        Apply link loading to the network using the demand matrix and the previously computed route sets.
+        """
         if self.ods == nullptr \
            or self.link_union_set == nullptr \
            or self.prob_set == nullptr:
@@ -903,11 +912,12 @@ cdef class RouteChoiceSet:
         if not isinstance(matrix, AequilibraeMatrix):
             raise ValueError("`matrix` is not an AequilibraE matrix")
 
-        cores = cores if cores > 0 else omp_get_num_threads()
+        cores = cores if cores > 0 else omp_get_max_threads()
 
         cdef:
             vector[vector[double] *] *path_files = <vector[vector[double] *] *>nullptr
             vector[double] *ll
+            vector[double] *vec
 
         if generate_path_files:
             path_files = RouteChoiceSet.compute_path_files(
@@ -918,46 +928,49 @@ cdef class RouteChoiceSet:
                 cores,
             )
 
-            # FIXME, write out path files
-            tmp = []
-            for vec in deref(path_files):
-                tmp.append(deref(vec))
-            print(tmp)
-
-        def apply_link_loading_func(m):
-            if generate_path_files:
-                ll = self.apply_link_loading_from_path_files(
-                    m,
-                    deref(path_files),
-                )
-            else:
-                ll = self.apply_link_loading(m)
-
-            # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without
-            # transferring owner ship.
-            compressed = <double[:ll.size(), :1]>&deref(ll)[0]
-
-            actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
-            assign_link_loads_cython(
-                actual,
-                compressed,
-                self.graph_compressed_id_view,
-                cores
-            )
-            compressed = np.array(compressed, copy=True)
-            del ll
-            return actual.reshape(-1), compressed.reshape(-1)
+            # # FIXME, write out path files
+            # tmp = []
+            # for vec in deref(path_files):
+            #     tmp.append(deref(vec))
+            # print(tmp)
 
         if len(matrix.view_names) == 1:
-            link_loads = apply_link_loading_func(matrix.matrix_view)
+            link_loads = self.apply_link_loading_func(matrix.matrix_view, path_files, generate_path_files, cores)
         else:
             link_loads = {
-                name: apply_link_loading_func(matrix.matrix_view[:, :, i])
+                name: self.apply_link_loading_func(matrix.matrix_view[:, :, i], path_files, generate_path_files, cores)
                 for i, name in enumerate(matrix.names)
             }
 
+        if generate_path_files:
+            for vec in deref(path_files):
+                del vec
+            del path_files
+
         return link_loads
 
+    cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores):
+        """Helper function for self.link_loading. Cannot free a pointer captured in a local scope by a lambda."""
+        if generate_path_files:
+            ll = self.apply_link_loading_from_path_files(m, deref(pf))
+        else:
+            ll = self.apply_link_loading(m)
+
+        # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without
+        # transferring ownership.
+        compressed = <double[:ll.size(), :1]>&deref(ll)[0]
+
+        actual = np.zeros((self.graph_compressed_id_view.shape[0], 1), dtype=np.float64)
+        assign_link_loads_cython(
+            actual,
+            compressed,
+            self.graph_compressed_id_view,
+            cores
+        )
+        compressed = np.array(compressed, copy=True)
+        del ll
+        return actual.reshape(-1), compressed.reshape(-1)
+
     @cython.boundscheck(False)
     @cython.wraparound(False)
     @cython.embedsignature(True)
@@ -976,10 +989,10 @@ cdef class RouteChoiceSet:
         Returns vector of vectors of link loads corresponding to each link in it's link_union_set.
         """
         cdef:
-            vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size())  # FIXME FREE ME
+            vector[vector[double] *] *link_loads = new vector[vector[double] *](ods.size())
             vector[long long] *link_union
             vector[double] *loads
-            vector[double] *link
+            vector[long long] *links
 
             vector[long long].const_iterator link_union_iter
             vector[long long].const_iterator link_iter
@@ -991,7 +1004,7 @@ cdef class RouteChoiceSet:
         with parallel(num_threads=cores):
             for i in prange(ods.size()):
                 link_union = link_union_set[i]
-                loads = new vector[double](link_union.size(), 0.0)  # FIXME FREE ME
+                loads = new vector[double](link_union.size(), 0.0)
 
                 # We now iterate over all routes in the route_set, each route has an associated probability
                 route_prob_iter = prob_set[i].cbegin()
@@ -1006,13 +1019,14 @@ cdef class RouteChoiceSet:
                     # is known to be sorted, if the links in the route are also sorted we can just step along both
                     # arrays simultaneously, skipping elements in the link_union when appropriate. This allows us to
                     # operate on the link loads as a sparse map and avoid blowing up memory usage when using a dense
-                    # formulation.  This is also incredibly cache efficient, the only downsides are that the code is
-                    # harder to read and it requires sorting the route. NOTE: the sorting of routes is technically
-                    # something that is already computed, during the computation of the link frequency we merge and sort
-                    # all links, if we instead sorted then used an N-way merge we could reuse the sorted routes and the
-                    # sorted link union.
+                    # formulation. This is also more cache efficient, the only downsides are that the code is
+                    # harder to read and it requires sorting the route.
+
+                    # NOTE: the sorting of routes is technically something that is already computed, during the
+                    # computation of the link frequency we merge and sort all links, if we instead sorted then used an
+                    # N-way merge we could reuse the sorted routes and the sorted link union.
 
-                    # We copy the links in case the routes haven't already been saved  # FIXME FREE ME
+                    # We copy the links in case the routes haven't already been saved
                     links = new vector[long long](deref(route))
                     sort(links.begin(), links.end())
 
@@ -1030,6 +1044,8 @@ cdef class RouteChoiceSet:
 
                         inc(link_iter)
 
+                    del links
+
                 deref(link_loads)[i] = loads
 
         return link_loads
@@ -1056,7 +1072,7 @@ cdef class RouteChoiceSet:
             long origin_index, dest_index
             double demand
 
-            vector[double] *link_loads = new vector[double](self.num_links)  # FIXME FREE ME
+            vector[double] *link_loads = new vector[double](self.num_links)
 
         for i in range(self.ods.size()):
             loads = path_files[i]
@@ -1088,7 +1104,7 @@ cdef class RouteChoiceSet:
             long origin_index, dest_index
             double demand, prob, load
 
-            vector[double] *link_loads = new vector[double](self.num_links)  # FIXME FREE ME
+            vector[double] *link_loads = new vector[double](self.num_links)
 
         for i in range(self.ods.size()):
             route_set = deref(self.results)[i]
@@ -1121,6 +1137,15 @@ cdef class RouteChoiceSet:
         vector[vector[double] *] *path_overlap_set,
         vector[vector[double] *] *prob_set
     ):
+        """
+        Construct an Arrow table from C++ stdlib structures.
+
+        Note: this function directly utilises the Arrow C++ API, the Arrow Cython API is not sufficient.
+        See `route_choice_set.pxd` for Cython declarations.
+
+        Returns a shared pointer to a Arrow CTable. This should be wrapped in a Python table before use.
+        Compressed link IDs are expanded to full network link IDs.
+        """
         cdef:
             shared_ptr[libpa.CArray] paths
             shared_ptr[libpa.CArray] offsets
@@ -1162,7 +1187,7 @@ cdef class RouteChoiceSet:
             route_set = route_sets[i]
 
             # Instead of construction a "list of lists" style object for storing the route sets we instead will
-            # construct one big array of link ids with a corresponding offsets array that indicates where each new row
+            # construct one big array of link IDs with a corresponding offsets array that indicates where each new row
             # (path) starts.
             for route in deref(route_set):
                 o_col.Append(ods[i].first)
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 7ed57c67c..ca3d31529 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -8,7 +8,7 @@
 import numpy as np
 import pyarrow as pa
 
-from aequilibrae import Graph, Project
+from aequilibrae import Project
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
 from aequilibrae.paths.route_choice import RouteChoice
 
@@ -55,7 +55,8 @@ def test_route_choice(self):
                     results, [(2, 6, 9, 13, 25, 30, 53, 59)], "Initial route isn't the shortest A* route"
                 )
 
-                # A depth of 2 should yield the same initial route plus the length of that route more routes minus duplicates and unreachable paths
+                # A depth of 2 should yield the same initial route plus the length of that route more routes minus
+                # duplicates and unreachable paths
                 results2 = rc.run(a, b, max_routes=0, max_depth=2, **kwargs)
                 self.assertTrue(results[0] in results2, "Initial route isn't present in a lower depth")
 

From fa5b2e68da5bb138f31cd8ee8f6c8e49e2dabaa1 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 30 Apr 2024 10:00:41 +1000
Subject: [PATCH 39/52] Add test with known results

---
 tests/aequilibrae/paths/test_route_choice.py | 48 ++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index ca3d31529..8eca8b14e 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -11,6 +11,7 @@
 from aequilibrae import Project
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
 from aequilibrae.paths.route_choice import RouteChoice
+from aequilibrae.matrix import AequilibraeMatrix
 
 from ...data import siouxfalls_project
 
@@ -225,6 +226,53 @@ def test_link_loading(self):
 
         np.testing.assert_array_almost_equal(link_loads, link_loads2)
 
+    def test_known_results(self):
+        np.random.seed(0)
+        rc = RouteChoiceSet(self.graph)
+        nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
+        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+
+        mat = AequilibraeMatrix()
+        mat.create_empty(
+            memory_only=True,
+            zones=self.graph.num_zones,
+            matrix_names=["all zeros", "single one"],
+        )
+        mat.index = self.graph.centroids[:]
+        mat.computational_view()
+        mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0)
+        mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones))
+
+        for od in nodes:
+            mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0
+
+        mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0
+
+        link_loads = rc.link_loading(mat)
+
+        with self.subTest(matrix="all zeros"):
+            u, c = link_loads["all zeros"]
+            np.testing.assert_allclose(u, 0.0)
+            np.testing.assert_allclose(c, 0.0)
+
+        with self.subTest(matrix="single one"):
+            u, c = link_loads["single one"]
+            link = self.graph.graph[
+                (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1)
+            ]
+
+            lid = link.link_id.values[0]
+            c_lid = link.__compressed_id__.values[0]
+
+            self.assertAlmostEqual(u[lid - 1], 1.0)
+            self.assertAlmostEqual(c[c_lid], 1.0)
+
+            u[lid - 1] = 0.0
+            c[c_lid] = 0.0
+
+            np.testing.assert_allclose(u, 0.0)
+            np.testing.assert_allclose(c, 0.0)
+
 
 class TestRouteChoice(TestCase):
     def setUp(self) -> None:

From 5d0c731306f16d7202dd4ffb16fa89a89933b1cf Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 30 Apr 2024 17:42:25 +1000
Subject: [PATCH 40/52] Move graph index building to Cython for free 1.5x

---
 aequilibrae/paths/graph.py           | 62 +----------------------
 aequilibrae/paths/graph_building.pyx | 73 ++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 60 deletions(-)

diff --git a/aequilibrae/paths/graph.py b/aequilibrae/paths/graph.py
index a8ee6a2ce..495c7ff72 100644
--- a/aequilibrae/paths/graph.py
+++ b/aequilibrae/paths/graph.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 import pandas as pd
-from aequilibrae.paths.graph_building import build_compressed_graph
+from aequilibrae.paths.graph_building import build_compressed_graph, create_compressed_link_network_mapping
 
 from aequilibrae.context import get_logger
 
@@ -553,65 +553,7 @@ def create_compressed_link_network_mapping(self):
             **data** (:obj:`np.array`): array of link ids
         """
 
-        # Cache the result, this isn't a huge computation but isn't worth doing twice
-        if (
-            self.compressed_link_network_mapping_idx is not None
-            and self.compressed_link_network_mapping_data is not None
-            and self.network_compressed_node_mapping is not None
-        ):
-            return (
-                self.compressed_link_network_mapping_idx,
-                self.compressed_link_network_mapping_data,
-                self.network_compressed_node_mapping,
-            )
-
-        # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
-        # bother redoing sorting it. This method would be faster using a Cython module but it's a one time compute
-
-        # Some links are completely removed from the network, they are assigned ID `self.compact_graph.id.max() + 1`,
-        # we skip them.
-        filtered = self.graph[self.graph.__compressed_id__ != self.compact_graph.id.max() + 1]
-        gb = filtered.groupby(by="__compressed_id__", sort=True)
-        idx = np.zeros(self.compact_num_links + 1, dtype=np.uint32)
-        data = np.zeros(len(filtered), dtype=np.uint32)
-
-        node_mapping = np.full(self.num_nodes, -1)
-
-        i = 0
-        for compressed_id, df in gb:
-            idx[compressed_id] = i
-            values = df.link_id.values
-            a = df.a_node.values
-            b = df.b_node.values
-
-            # In order to ensure that the link IDs come out in the correct order we must walk the links
-            # we do this assuming the `a` array is sorted.
-            j = 0
-            # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup
-
-            a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0]
-            while True:
-                tmp = a.searchsorted(x)
-                if tmp < len(a) and a[tmp] == x:
-                    x = b[tmp]
-                    data[i + j] = values[tmp]
-                else:
-                    break
-                j += 1
-
-            b_node = x
-            node_mapping[a_node] = self.compact_graph["a_node"].iat[compressed_id]
-            node_mapping[b_node] = self.compact_graph["b_node"].iat[compressed_id]
-
-            i += len(values)
-
-        idx[-1] = i
-
-        self.compressed_link_network_mapping_idx = idx
-        self.compressed_link_network_mapping_data = data
-        self.network_compressed_node_mapping = node_mapping
-
-        return idx, data, node_mapping
+        return create_compressed_link_network_mapping(self)
 
 
 class Graph(GraphBase):
diff --git a/aequilibrae/paths/graph_building.pyx b/aequilibrae/paths/graph_building.pyx
index 3bb3ef4c9..724643de8 100644
--- a/aequilibrae/paths/graph_building.pyx
+++ b/aequilibrae/paths/graph_building.pyx
@@ -381,3 +381,76 @@ def build_compressed_graph(graph):
     # If will refer all the links that have no correlation to an element beyond the last link
     # This element will always be zero during assignment
     graph.graph.__compressed_id__ = graph.graph.__compressed_id__.fillna(graph.compact_graph.id.max() + 1).astype(np.int64)
+
+
+@cython.embedsignature(True)
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+def create_compressed_link_network_mapping(graph):
+        # Cache the result, this isn't a huge computation but isn't worth doing twice
+        if (
+            graph.compressed_link_network_mapping_idx is not None
+            and graph.compressed_link_network_mapping_data is not None
+            and graph.network_compressed_node_mapping is not None
+        ):
+            return (
+                graph.compressed_link_network_mapping_idx,
+                graph.compressed_link_network_mapping_data,
+                graph.network_compressed_node_mapping,
+            )
+
+        cdef:
+            long long i, j, a_node, x, b_node, tmp, compressed_id
+            long long[:] b
+            long long[:] values
+            np.uint32_t[:] idx
+            np.uint32_t[:] data
+            np.int32_t[:] node_mapping
+
+        # This method requires that graph.graph is sorted on the a_node IDs, since that's done already we don't
+        # bother redoing sorting it.
+
+        # Some links are completely removed from the network, they are assigned ID `graph.compact_graph.id.max() + 1`,
+        # we skip them.
+        filtered = graph.graph[graph.graph.__compressed_id__ != graph.compact_graph.id.max() + 1]
+        gb = filtered.groupby(by="__compressed_id__", sort=True)
+        idx = np.zeros(graph.compact_num_links + 1, dtype=np.uint32)
+        data = np.zeros(len(filtered), dtype=np.uint32)
+
+        node_mapping = np.full(graph.num_nodes, -1, dtype=np.int32)
+
+        i = 0
+        for compressed_id, df in gb:
+            idx[compressed_id] = i
+            values = df.link_id.values
+            a = df.a_node.values
+            b = df.b_node.values
+
+            # In order to ensure that the link IDs come out in the correct order we must walk the links
+            # we do this assuming the `a` array is sorted.
+            j = 0
+            # Find the missing a_node, this is the starting of the chain. We cannot rely on the node ordering to do a simple lookup
+
+            a_node = x = a[np.isin(a, b, invert=True, assume_unique=True)][0]
+            while True:
+                tmp = a.searchsorted(x)
+                if tmp < len(a) and a[tmp] == x:
+                    x = b[tmp]
+                    data[i + j] = values[tmp]
+                else:
+                    break
+                j += 1
+
+            b_node = x
+            node_mapping[a_node] = graph.compact_graph["a_node"].iat[compressed_id]
+            node_mapping[b_node] = graph.compact_graph["b_node"].iat[compressed_id]
+
+            i += len(values)
+
+        idx[-1] = i
+
+        graph.compressed_link_network_mapping_idx = idx
+        graph.compressed_link_network_mapping_data = data
+        graph.network_compressed_node_mapping = node_mapping
+
+        return idx, data, node_mapping

From e290069abc9eb92e3b093627db1e94e93be299db Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 30 Apr 2024 17:43:07 +1000
Subject: [PATCH 41/52] Add select link support with sparse matrices

---
 aequilibrae/matrix/sparse_matrix.pxd         |  13 ++
 aequilibrae/matrix/sparse_matrix.pyx         |  74 +++++++
 aequilibrae/paths/route_choice.py            | 205 ++++++++++++-------
 aequilibrae/paths/route_choice_set.pxd       |  11 +-
 aequilibrae/paths/route_choice_set.pyx       | 127 ++++++++++--
 setup.py                                     |  12 +-
 tests/aequilibrae/paths/test_route_choice.py |   8 +-
 7 files changed, 351 insertions(+), 99 deletions(-)
 create mode 100644 aequilibrae/matrix/sparse_matrix.pxd
 create mode 100644 aequilibrae/matrix/sparse_matrix.pyx

diff --git a/aequilibrae/matrix/sparse_matrix.pxd b/aequilibrae/matrix/sparse_matrix.pxd
new file mode 100644
index 000000000..53c5b611b
--- /dev/null
+++ b/aequilibrae/matrix/sparse_matrix.pxd
@@ -0,0 +1,13 @@
+from libcpp.vector cimport vector
+
+cdef class Sparse:
+    pass
+
+cdef class COO(Sparse):
+    cdef:
+        vector[size_t] *row
+        vector[size_t] *col
+        vector[double] *data
+        readonly object shape
+
+    cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil
diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx
new file mode 100644
index 000000000..7c393834e
--- /dev/null
+++ b/aequilibrae/matrix/sparse_matrix.pyx
@@ -0,0 +1,74 @@
+from libcpp.vector cimport vector
+from libcpp cimport nullptr
+from cython.operator cimport dereference as d
+
+import scipy.sparse
+import numpy as np
+
+cdef class Sparse:
+    """
+    A class to implement sparse matrix operations such as reading, writing, and indexing
+    """
+
+    def __cinit__(self):
+        """C level init. For C memory allocation and initialisation. Called exactly once per object."""
+        pass
+
+    def __init__(self):
+        """Python level init, may be called multiple times, for things that can't be done in __cinit__."""
+        pass
+
+    def __dealloc__(self):
+        """
+        C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
+        partially deallocated state already.
+        """
+        pass
+
+
+cdef class COO(Sparse):
+    """
+    A class to implement sparse matrix operations such as reading, writing, and indexing
+    """
+
+    def __cinit__(self):
+        """C level init. For C memory allocation and initialisation. Called exactly once per object."""
+
+        self.row = new vector[size_t]()
+        self.col = new vector[size_t]()
+        self.data = new vector[double]()
+
+    def __init__(self, shape=None):
+        """Python level init, may be called multiple times, for things that can't be done in __cinit__."""
+
+        self.shape = shape
+
+    def __dealloc__(self):
+        """
+        C level deallocation. For freeing memory allocated by this object. *Must* have GIL, `self` may be in a
+        partially deallocated state already.
+        """
+
+        del self.row
+        self.row = <vector[size_t] *>nullptr
+
+        del self.col
+        self.col = <vector[size_t] *>nullptr
+
+        del self.data
+        self.data = <vector[double] *>nullptr
+
+    def to_scipy(self, shape=None):
+        row = <size_t[:self.row.size()]>&d(self.row)[0]
+        col = <size_t[:self.col.size()]>&d(self.col)[0]
+        data = <double[:self.data.size()]>&d(self.data)[0]
+
+        if shape is None:
+            shape = self.shape
+
+        return scipy.sparse.coo_matrix((data, (row, col)), dtype=np.float64, shape=shape)
+
+    cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil:
+        self.row.push_back(i)
+        self.col.push_back(j)
+        self.data.push_back(v)
diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index e41002047..8b836a42d 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -1,9 +1,11 @@
 import itertools
+import warnings
 import logging
 import pathlib
 import socket
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union, Dict
 from uuid import uuid4
+from functools import cached_property
 
 import numpy as np
 import pandas as pd
@@ -35,13 +37,16 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro
         self.cores: int = 0
         self.graph = graph
         self.matrix = matrix
-        self.__rc = None
 
         self.schema = RouteChoiceSet.schema
         self.psl_schema = RouteChoiceSet.psl_schema
 
-        self.compact_link_loads: Optional[np.array] = None
-        self.link_loads: Optional[np.array] = None
+        self.compact_link_loads: Optional[Dict[str, np.array]] = None
+        self.link_loads: Optional[Dict[str, np.array]] = None
+
+        self.sl_compact_link_loads: Optional[Dict[str, np.array]] = None
+        self.sl_link_loads: Optional[Dict[str, np.array]] = None
+
         self.results: Optional[pa.Table] = None
         self.where: Optional[pathlib.Path] = None
         self.save_path_files: bool = False
@@ -49,6 +54,11 @@ def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, pro
         self.nodes: Optional[Union[List[int], List[Tuple[int, int]]]] = None
 
         self._config = {}
+        self._selected_links = {}
+
+    @cached_property
+    def __rc(self) -> RouteChoiceSet:
+        return RouteChoiceSet(self.graph)
 
     def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
         """
@@ -182,8 +192,6 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool
         :Returns:
             ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs.
         """
-        if self.__rc is None:
-            self.__rc = RouteChoiceSet(self.graph)
 
         self.results = None
         return self.__rc.run(
@@ -213,9 +221,6 @@ def execute(self, perform_assignment: bool = False) -> None:
                 "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`"
             )
 
-        if self.__rc is None:
-            self.__rc = RouteChoiceSet(self.graph)
-
         self.results = None
         self.__rc.batched(
             self.nodes,
@@ -229,7 +234,8 @@ def execute(self, perform_assignment: bool = False) -> None:
     def info(self) -> dict:
         """Returns information for the transit assignment procedure
 
-        Dictionary contains keys  'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', and 'Parameters'.
+        Dictionary contains keys  'Algorithm', 'Matrix totals', 'Computer name', 'Procedure ID', 'Parameters', and
+        'Select links'.
 
         The classes key is also a dictionary with all the user classes per transit class and their respective
         matrix totals
@@ -250,6 +256,7 @@ def info(self) -> dict:
             "Computer name": socket.gethostname(),
             "Procedure ID": self.procedure_id,
             "Parameters": self.parameters,
+            "Select links": self._selected_links,
         }
         return info
 
@@ -280,93 +287,139 @@ def get_results(self) -> Union[pa.Table, pa.dataset.Dataset]:
 
         return self.results
 
-    def get_load_results(
-        self,
-        which: str = "uncompressed",
-        clamp: bool = True,
-    ) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
+    def get_load_results(self) -> Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]:
         """
         Translates the link loading results from the graph format into the network format.
 
-        :Arguments:
-            **which** (:obj:`str`): Which results to return: only `"uncompressed"`, only `"compressed"` or `"both"`.
-
         :Returns:
             **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
-                A tuple of uncompressed and compressed link loading results as DataFrames. Or
-                the requested link loading results.
-
+                A tuple of uncompressed and compressed link loading results as DataFrames.
+                Columns are the matrix name concatenated direction.
         """
 
-        if not isinstance(which, str) or which not in ["uncompressed", "compressed", "both"]:
-            raise ValueError("`which` argument must be one of ['uncompressed', 'compressed', 'both']")
-
         if self.matrix is None:
             raise ValueError(
                 "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute."
             )
 
-        compressed = which == "both" or which == "compressed"
-        uncompressed = which == "both" or which == "uncompressed"
-
-        fields = self.matrix.names
-
         tmp = self.__rc.link_loading(self.matrix, self.save_path_files)
-        if isinstance(tmp, dict):
-            self.link_loads = {k: v[0] for k, v in tmp.items()}
-            self.compact_link_loads = {k: v[1] for k, v in tmp.items()}
-        else:
-            self.link_loads = {fields[0]: tmp[0]}
-            self.compact_link_loads = {fields[0]: tmp[1]}
+        self.link_loads = {k: v[0] for k, v in tmp.items()}
+        self.compact_link_loads = {k: v[1] for k, v in tmp.items()}
 
-        # Get a mapping from the compressed graph to/from the network graph
+        # Create a data store with a row for each uncompressed link
         m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values)
+        lids = np.unique(self.graph.graph.link_id.values)
+        uncompressed_df = self.__link_loads_to_df(m, lids, self.link_loads)
+
         m_compact = _get_graph_to_network_mapping(
             self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values
         )
-
-        lids = np.unique(self.graph.graph.link_id.values)
         compact_lids = np.unique(self.graph.compact_graph.link_id.values)
-        # Create a data store with a row for each uncompressed link
-        if uncompressed:
-            uncompressed_df = pd.DataFrame(
-                {"link_id": lids}
-                | {k + dir: np.zeros(lids.shape) for k in self.link_loads.keys() for dir in ["_ab", "_ba"]}
-            )
-            for k, v in self.link_loads.items():
-                # Directional Flows
-                uncompressed_df[k + "_ab"].values[m.network_ab_idx] = np.nan_to_num(v[m.graph_ab_idx])
-                uncompressed_df[k + "_ba"].values[m.network_ba_idx] = np.nan_to_num(v[m.graph_ba_idx])
-
-                # Tot Flow
-                uncompressed_df[k + "_tot"] = np.nan_to_num(uncompressed_df[k + "_ab"].values) + np.nan_to_num(
-                    uncompressed_df[k + "_ba"].values
-                )
+        compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.compact_link_loads)
 
-        if compressed:
-            compressed_df = pd.DataFrame(
-                {"link_id": compact_lids}
-                | {
-                    k + dir: np.zeros(compact_lids.shape)
-                    for k in self.compact_link_loads.keys()
-                    for dir in ["_ab", "_ba"]
-                }
-            )
-            for k, v in self.compact_link_loads.items():
-                compressed_df[k + "_ab"].values[m_compact.network_ab_idx] = np.nan_to_num(v[m_compact.graph_ab_idx])
-                compressed_df[k + "_ba"].values[m_compact.network_ba_idx] = np.nan_to_num(v[m_compact.graph_ba_idx])
+        return uncompressed_df, compressed_df
 
-                # Tot Flow
-                compressed_df[k + "_tot"] = np.nan_to_num(compressed_df[k + "_ab"].values) + np.nan_to_num(
-                    compressed_df[k + "_ba"].values
-                )
+    def __link_loads_to_df(self, mapping, lids, link_loads):
+        df = pd.DataFrame(
+            {"link_id": lids} | {k + dir: np.zeros(lids.shape) for k in link_loads.keys() for dir in ["_ab", "_ba"]}
+        )
+        for k, v in link_loads.items():
+            # Directional Flows
+            df[k + "_ab"].values[mapping.network_ab_idx] = np.nan_to_num(v[mapping.graph_ab_idx])
+            df[k + "_ba"].values[mapping.network_ba_idx] = np.nan_to_num(v[mapping.graph_ba_idx])
 
-        if uncompressed and not compressed:
-            return uncompressed_df
-        elif not uncompressed and compressed:
-            return compressed_df
-        else:
-            return uncompressed_df, compressed_df
+            # Tot Flow
+            df[k + "_tot"] = np.nan_to_num(df[k + "_ab"].values) + np.nan_to_num(df[k + "_ba"].values)
+
+        return df
+
+    def set_select_links(self, links: Dict[str, List[Tuple[int, int]]]):
+        """
+        Set the selected links. Checks if the links and directions are valid. Translates `links=None` and
+        direction into unique link ID used in compact graph.
+
+        Supply `links=None` to disable select link analysis.
+
+        :Arguments:
+            **links** (:obj:`Union[None, Dict[str, List[Tuple[int, int]]]]`): name of link set and
+             Link IDs and directions to be used in select link analysis.
+        """
+        self._selected_links = {}
+
+        if links is None:
+            del self._config["select_links"]
+            return
+
+        max_id = self.graph.compact_graph.id.max() + 1
+
+        for name, link_set in links.items():
+            if len(name.split(" ")) != 1:
+                warnings.warn("Input string name has a space in it. Replacing with _")
+                name = str.join("_", name.split(" "))
+
+            link_ids = []
+            for link, dir in link_set:
+                if dir == 0:
+                    query = (self.graph.graph["link_id"] == link) & (
+                        (self.graph.graph["direction"] == -1) | (self.graph.graph["direction"] == 1)
+                    )
+                else:
+                    query = (self.graph.graph["link_id"] == link) & (self.graph.graph["direction"] == dir)
+                if not query.any():
+                    raise ValueError(f"link_id or direction {(link, dir)} is not present within graph.")
+                    # Check for duplicate compressed link ids in the current link set
+                for comp_id in self.graph.graph[query]["__compressed_id__"].values:
+                    if comp_id == max_id:
+                        raise ValueError(
+                            f"link ID {link} and direction {dir} is not present in compressed graph. "
+                            "It may have been removed during dead-end removal."
+                        )
+                    elif comp_id in link_ids:
+                        warnings.warn(
+                            "Two input links map to the same compressed link in the network"
+                            f", removing superfluous link {link} and direction {dir} with compressed id {comp_id}"
+                        )
+                    else:
+                        link_ids.append(comp_id)
+            self._selected_links[name] = link_ids
+        self._config["select_links"] = str(links)
 
     def get_select_link_results(self) -> pd.DataFrame:
-        raise NotImplementedError()
+        """
+        Get the select link loading results.
+
+        :Returns:
+            **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
+                A tuple of uncompressed and compressed select link loading results as DataFrames.
+                Columns are the matrix name concatenated with the select link set and direction.
+        """
+
+        if self.matrix is None:
+            raise ValueError(
+                "AequilibraE matrix was not initially provided. To perform link loading set the `RouteChoice.matrix` attribute."
+            )
+
+        tmp = self.__rc.select_link_loading(self.matrix, self._selected_links)
+
+        self.sl_link_loads = {}
+        self.sl_compact_link_loads = {}
+        self.sl_od_matrix = {}
+        for name, sl_res in tmp.items():
+            for sl_name, res in sl_res.items():
+                mat, (u, c) = res
+                self.sl_od_matrix[name + "_" + sl_name] = mat
+                self.sl_link_loads[name + "_" + sl_name] = u
+                self.sl_compact_link_loads[name + "_" + sl_name] = c
+
+        # Create a data store with a row for each uncompressed link
+        m = _get_graph_to_network_mapping(self.graph.graph.link_id.values, self.graph.graph.direction.values)
+        lids = np.unique(self.graph.graph.link_id.values)
+        uncompressed_df = self.__link_loads_to_df(m, lids, self.sl_link_loads)
+
+        m_compact = _get_graph_to_network_mapping(
+            self.graph.compact_graph.link_id.values, self.graph.compact_graph.direction.values
+        )
+        compact_lids = np.unique(self.graph.compact_graph.link_id.values)
+        compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.sl_compact_link_loads)
+
+        return uncompressed_df, compressed_df
diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index a8c317796..c4a016194 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -1,5 +1,7 @@
 # cython: language_level=3str
 from aequilibrae.paths.results import PathResults
+from aequilibrae.matrix.sparse_matrix cimport COO
+
 from libcpp.vector cimport vector
 from libcpp.unordered_set cimport unordered_set
 from libcpp.unordered_map cimport unordered_map
@@ -230,7 +232,14 @@ cdef class RouteChoiceSet:
 
     cdef vector[double] *apply_link_loading(RouteChoiceSet self, double[:, :] matrix_view) noexcept nogil
     cdef vector[double] *apply_link_loading_from_path_files(RouteChoiceSet self, double[:, :] matrix_view, vector[vector[double] *] &path_files) noexcept nogil
-    cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores)
+    cdef apply_link_loading_func(RouteChoiceSet self, vector[double] *ll, int cores)
+
+    cdef vector[double] *apply_select_link_loading(
+        RouteChoiceSet self,
+        COO sparse_mat,
+        double[:, :] matrix_view,
+        unordered_set[long] &select_link_set
+    ) noexcept nogil
 
     cdef shared_ptr[libpa.CTable] make_table_from_results(
         RouteChoiceSet self,
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 5dbdba814..af258b1da 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -2,6 +2,7 @@
 
 from aequilibrae.paths.graph import Graph
 from aequilibrae.matrix import AequilibraeMatrix
+from aequilibrae.matrix.sparse_matrix cimport COO
 
 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as inc
@@ -916,7 +917,6 @@ cdef class RouteChoiceSet:
 
         cdef:
             vector[vector[double] *] *path_files = <vector[vector[double] *] *>nullptr
-            vector[double] *ll
             vector[double] *vec
 
         if generate_path_files:
@@ -934,13 +934,15 @@ cdef class RouteChoiceSet:
             #     tmp.append(deref(vec))
             # print(tmp)
 
-        if len(matrix.view_names) == 1:
-            link_loads = self.apply_link_loading_func(matrix.matrix_view, path_files, generate_path_files, cores)
-        else:
-            link_loads = {
-                name: self.apply_link_loading_func(matrix.matrix_view[:, :, i], path_files, generate_path_files, cores)
-                for i, name in enumerate(matrix.names)
-            }
+        link_loads = {}
+        for i, name in enumerate(matrix.names):
+            m = matrix.matrix_view if len(matrix.view_names) == 1 else matrix.matrix_view[:, :, i]
+
+            ll = self.apply_link_loading_from_path_files(m, deref(path_files)) \
+                if generate_path_files else self.apply_link_loading(m)
+
+            link_loads[name] = self.apply_link_loading_func(ll, cores)
+            del ll
 
         if generate_path_files:
             for vec in deref(path_files):
@@ -949,13 +951,8 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
-    cdef apply_link_loading_func(RouteChoiceSet self, double[:, :] m, vector[vector[double] *] *pf, bint generate_path_files, int cores):
-        """Helper function for self.link_loading. Cannot free a pointer captured in a local scope by a lambda."""
-        if generate_path_files:
-            ll = self.apply_link_loading_from_path_files(m, deref(pf))
-        else:
-            ll = self.apply_link_loading(m)
-
+    cdef apply_link_loading_func(RouteChoiceSet self, vector[double] *ll, int cores):
+        """Helper function for link_loading."""
         # This incantation creates a 2d (ll.size() x 1) memory view object around the underlying vector data without
         # transferring ownership.
         compressed = <double[:ll.size(), :1]>&deref(ll)[0]
@@ -968,7 +965,6 @@ cdef class RouteChoiceSet:
             cores
         )
         compressed = np.array(compressed, copy=True)
-        del ll
         return actual.reshape(-1), compressed.reshape(-1)
 
     @cython.boundscheck(False)
@@ -1062,8 +1058,6 @@ cdef class RouteChoiceSet:
         """
         Apply link loading from path files.
 
-        If path files have already been computed then this is a more efficient manner for the link loading.
-
         Returns a vector of link loads indexed by compressed link ID.
         """
         cdef:
@@ -1101,6 +1095,7 @@ cdef class RouteChoiceSet:
         cdef:
             RouteSet_t *route_set
             vector[double] *route_set_prob
+            vector[double].const_iterator route_prob_iter
             long origin_index, dest_index
             double demand, prob, load
 
@@ -1125,6 +1120,102 @@ cdef class RouteChoiceSet:
 
         return link_loads
 
+    @cython.embedsignature(True)
+    def select_link_loading(RouteChoiceSet self, matrix, select_links: Dict[str, List[long]], cores: int = 0):
+        """
+        Apply link loading to the network using the demand matrix and the previously computed route sets.
+        """
+        if self.ods == nullptr \
+           or self.link_union_set == nullptr \
+           or self.prob_set == nullptr:
+            raise ValueError("select link loading requires Route Choice path_size_logit results")
+
+        if not isinstance(matrix, AequilibraeMatrix):
+            raise ValueError("`matrix` is not an AequilibraE matrix")
+
+        cores = cores if cores > 0 else omp_get_max_threads()
+
+        cdef:
+            unordered_set[long] select_link_set
+            vector[double] *ll
+
+        link_loads = {}
+
+        for i, name in enumerate(matrix.names):
+            matrix_ll = {}
+            m = matrix.matrix_view if len(matrix.view_names) == 1 else matrix.matrix_view[:, :, i]
+            for (k, v) in select_links.items():
+                select_link_set = <unordered_set[long]> v
+
+                coo = COO((self.zones, self.zones))
+
+                ll = self.apply_select_link_loading(coo, m, select_link_set)
+                res = self.apply_link_loading_func(ll, cores)
+                del ll
+
+                matrix_ll[k] = (coo, res)
+            link_loads[name] = matrix_ll
+
+        return link_loads
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.embedsignature(True)
+    @cython.initializedcheck(False)
+    cdef vector[double] *apply_select_link_loading(
+        RouteChoiceSet self,
+        COO sparse_mat,
+        double[:, :] matrix_view,
+        unordered_set[long] &select_link_set
+    ) noexcept nogil:
+        """
+        Apply select link loading.
+
+        Returns a vector of link loads indexed by compressed link ID.
+        """
+        cdef:
+            RouteSet_t *route_set
+            vector[double] *route_set_prob
+            vector[double].const_iterator route_prob_iter
+            long origin_index, dest_index, o, d
+            double demand, prob, load
+
+            vector[double] *link_loads = new vector[double](self.num_links)
+
+            bool link_present = False
+
+        # For each OD pair, if a route contains one or more links in a select link set, add that ODs demand to
+        # a sparse matrix of Os to Ds
+
+        # For each route, if it contains one or more links in a select link set, apply the link loading for
+        # that route
+
+        for i in range(self.ods.size()):
+            route_set = deref(self.results)[i]
+            route_set_prob = deref(self.prob_set)[i]
+
+            origin_index = self.nodes_to_indices_view[deref(self.ods)[i].first]
+            dest_index = self.nodes_to_indices_view[deref(self.ods)[i].second]
+            demand = matrix_view[origin_index, dest_index]
+
+            route_prob_iter = route_set_prob.cbegin()
+            for route in deref(route_set):
+                prob = deref(route_prob_iter)
+                inc(route_prob_iter)
+                load = prob * demand
+
+                for link in deref(route):
+                    if select_link_set.find(link) != select_link_set.end():
+                        sparse_mat.append(origin_index, dest_index, load)
+                        link_present = True
+                        break
+
+                if link_present:
+                    for link in deref(route):
+                        deref(link_loads)[link] = deref(link_loads)[link] + load  # += here results in all zeros? Odd
+
+        return link_loads
+
     @cython.wraparound(False)
     @cython.embedsignature(True)
     @cython.boundscheck(False)
diff --git a/setup.py b/setup.py
index 804ac8f38..17e693d9b 100644
--- a/setup.py
+++ b/setup.py
@@ -82,6 +82,16 @@
     language="c++",
 )
 
+ext_mod_sparse_matrix = Extension(
+    "aequilibrae.matrix.sparse_matrix",
+    [join("aequilibrae", "matrix", "sparse_matrix.pyx")],
+    extra_compile_args=compile_args,
+    extra_link_args=link_args,
+    define_macros=[("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")],
+    include_dirs=include_dirs,
+    language="c++",
+)
+
 with open("requirements.txt", "r") as fl:
     install_requirements = [x.strip() for x in fl.readlines()]
 
@@ -130,7 +140,7 @@
         ],
         cmdclass={"build_ext": build_ext},
         ext_modules=cythonize(
-            [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building],
+            [ext_mod_aon, ext_mod_ipf, ext_mod_put, ext_mod_bfs_le, ext_mod_graph_building, ext_mod_sparse_matrix],
             compiler_directives={"language_level": "3str"},
         ),
     )
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 8eca8b14e..f0db90b89 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -221,10 +221,12 @@ def test_link_loading(self):
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
         rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
 
-        link_loads = rc.link_loading(self.mat)
-        link_loads2 = rc.link_loading(self.mat, generate_path_files=True)
+        n = self.mat.names[0]
 
-        np.testing.assert_array_almost_equal(link_loads, link_loads2)
+        ll = rc.link_loading(self.mat)[n]
+        ll2 = rc.link_loading(self.mat, generate_path_files=True)[n]
+
+        np.testing.assert_array_almost_equal(ll, ll2)
 
     def test_known_results(self):
         np.random.seed(0)

From 63cc837f49735cd5c796ebebb1b007c7bf809bec Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 1 May 2024 17:25:15 +1000
Subject: [PATCH 42/52] Add select link tests and fix bug

---
 aequilibrae/paths/route_choice_set.pyx       |   3 +-
 tests/aequilibrae/paths/test_route_choice.py | 127 +++++++++++++------
 2 files changed, 93 insertions(+), 37 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index af258b1da..a55e8e1e5 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -1182,7 +1182,7 @@ cdef class RouteChoiceSet:
 
             vector[double] *link_loads = new vector[double](self.num_links)
 
-            bool link_present = False
+            bool link_present
 
         # For each OD pair, if a route contains one or more links in a select link set, add that ODs demand to
         # a sparse matrix of Os to Ds
@@ -1204,6 +1204,7 @@ cdef class RouteChoiceSet:
                 inc(route_prob_iter)
                 load = prob * demand
 
+                link_present = False
                 for link in deref(route):
                     if select_link_set.find(link) != select_link_set.end():
                         sparse_mat.append(origin_index, dest_index, load)
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index f0db90b89..36710f67b 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -27,7 +27,7 @@ def setUp(self) -> None:
 
         self.project = Project()
         self.project.open(proj_path)
-        self.project.network.build_graphs(fields=["distance"], modes=["c"])
+        self.project.network.build_graphs(fields=["distance", "free_flow_time"], modes=["c"])
         self.graph = self.project.network.graphs["c"]  # type: Graph
         self.graph.set_graph("distance")
         self.graph.set_blocked_centroid_flows(False)
@@ -229,51 +229,106 @@ def test_link_loading(self):
         np.testing.assert_array_almost_equal(ll, ll2)
 
     def test_known_results(self):
-        np.random.seed(0)
-        rc = RouteChoiceSet(self.graph)
-        nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
-        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+        for cost in ["distance", "free_flow_time"]:
+            with self.subTest(cost=cost):
+                self.graph.set_graph(cost)
 
-        mat = AequilibraeMatrix()
-        mat.create_empty(
-            memory_only=True,
-            zones=self.graph.num_zones,
-            matrix_names=["all zeros", "single one"],
-        )
-        mat.index = self.graph.centroids[:]
-        mat.computational_view()
-        mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0)
-        mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones))
+                np.random.seed(0)
+                rc = RouteChoiceSet(self.graph)
+                nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
+                rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+
+                mat = AequilibraeMatrix()
+                mat.create_empty(
+                    memory_only=True,
+                    zones=self.graph.num_zones,
+                    matrix_names=["all zeros", "single one"],
+                )
+                mat.index = self.graph.centroids[:]
+                mat.computational_view()
+                mat.matrix_view[:, :, 0] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0)
+                mat.matrix_view[:, :, 1] = np.zeros((self.graph.num_zones, self.graph.num_zones))
 
-        for od in nodes:
-            mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0
+                for od in nodes:
+                    mat.matrix_view[:, :, 0][od[0] - 1, od[1] - 1] = 0.0
 
-        mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0
+                mat.matrix_view[:, :, 1][nodes[0][0] - 1, nodes[0][1] - 1] = 1.0
 
-        link_loads = rc.link_loading(mat)
+                link_loads = rc.link_loading(mat)
+                table = rc.get_results().to_pandas()
 
-        with self.subTest(matrix="all zeros"):
-            u, c = link_loads["all zeros"]
-            np.testing.assert_allclose(u, 0.0)
-            np.testing.assert_allclose(c, 0.0)
+                with self.subTest(matrix="all zeros"):
+                    u, c = link_loads["all zeros"]
+                    np.testing.assert_allclose(u, 0.0)
+                    np.testing.assert_allclose(c, 0.0)
+
+                with self.subTest(matrix="single one"):
+                    u, c = link_loads["single one"]
+                    link = self.graph.graph[
+                        (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1)
+                    ]
+
+                    lid = link.link_id.values[0]
+                    c_lid = link.__compressed_id__.values[0]
+                    t = table[table["route set"].apply(lambda x, lid=lid: lid in set(x))]
+                    v = t.probability.sum()
+
+                    self.assertAlmostEqual(u[lid - 1], v, places=6)
+                    self.assertAlmostEqual(c[c_lid], v, places=6)
+
+    def test_select_link(self):
+        for cost in ["distance", "free_flow_time"]:
+            with self.subTest(cost=cost):
+                self.graph.set_graph(cost)
+
+                np.random.seed(0)
+                rc = RouteChoiceSet(self.graph)
+                nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
+                rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
+
+                mat = AequilibraeMatrix()
+                mat.create_empty(
+                    memory_only=True,
+                    zones=self.graph.num_zones,
+                    matrix_names=["all ones"],
+                )
+                mat.index = self.graph.centroids[:]
+                mat.computational_view()
+                mat.matrix_view[:, :] = np.full((self.graph.num_zones, self.graph.num_zones), 1.0)
+
+                table = rc.get_results().to_pandas()
+
+                # Shortest routes between 20-4, and 21-2 share links 23 and 26. Link 26 also appears in between 10-8 and 17-9
+                # 20-4 also shares 11 with 5-3
+                ods = [(20, 4), (21, 2), (10, 8), (17, 9)]
+                sl_link_loads = rc.select_link_loading(
+                    mat,
+                    {
+                        "sl1": self.graph.graph.set_index("link_id").loc[[23, 26]].__compressed_id__.to_list(),
+                        "sl2": self.graph.graph.set_index("link_id").loc[[11]].__compressed_id__.to_list(),
+                    },
+                )
 
-        with self.subTest(matrix="single one"):
-            u, c = link_loads["single one"]
-            link = self.graph.graph[
-                (self.graph.graph.a_node == nodes[0][0] - 1) & (self.graph.graph.b_node == nodes[0][1] - 1)
-            ]
+                m, (u, c) = sl_link_loads["all ones"]["sl1"]
+                m2, (u2, c2) = sl_link_loads["all ones"]["sl2"]
+                m = m.to_scipy()
+                m2 = m2.to_scipy()
+                self.assertSetEqual(set(zip(*(m > 0.0001).nonzero())), {(o - 1, d - 1) for o, d in ods})
+                self.assertSetEqual(set(zip(*(m2 > 0.0001).nonzero())), {(20 - 1, 4 - 1), (5 - 1, 3 - 1)})
 
-            lid = link.link_id.values[0]
-            c_lid = link.__compressed_id__.values[0]
+                t1 = table[(table.probability > 0.0) & table["route set"].apply(lambda x: bool(set(x) & {23, 26}))]
+                t2 = table[(table.probability > 0.0) & table["route set"].apply(lambda x: 11 in set(x))]
+                sl1_link_union = np.unique(np.hstack(t1["route set"].values))
+                sl2_link_union = np.unique(np.hstack(t2["route set"].values))
 
-            self.assertAlmostEqual(u[lid - 1], 1.0)
-            self.assertAlmostEqual(c[c_lid], 1.0)
+                np.testing.assert_equal(u.nonzero()[0] + 1, sl1_link_union)
+                np.testing.assert_equal(u2.nonzero()[0] + 1, sl2_link_union)
 
-            u[lid - 1] = 0.0
-            c[c_lid] = 0.0
+                np.testing.assert_allclose(u, c)
+                np.testing.assert_allclose(u2, c2)
 
-            np.testing.assert_allclose(u, 0.0)
-            np.testing.assert_allclose(c, 0.0)
+                self.assertAlmostEqual(u.sum(), (t1["route set"].apply(len) * t1.probability).sum())
+                self.assertAlmostEqual(u2.sum(), (t2["route set"].apply(len) * t2.probability).sum())
 
 
 class TestRouteChoice(TestCase):

From 8a222d2320ad0be3076ad054a3614cb55a1b7a76 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 1 May 2024 17:25:30 +1000
Subject: [PATCH 43/52] Add sparse matrix writing

---
 aequilibrae/matrix/sparse_matrix.pyx | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx
index 7c393834e..3ee7cbb5f 100644
--- a/aequilibrae/matrix/sparse_matrix.pyx
+++ b/aequilibrae/matrix/sparse_matrix.pyx
@@ -4,6 +4,7 @@ from cython.operator cimport dereference as d
 
 import scipy.sparse
 import numpy as np
+import openmatrix as omx
 
 cdef class Sparse:
     """
@@ -25,6 +26,13 @@ cdef class Sparse:
         """
         pass
 
+    def to_disk(self, path, name: str):
+        f = omx.open_file(path, "a")
+        try:
+            f[name] = self.to_scipy().tocsr().toarray()
+        finally:
+            f.close()
+
 
 cdef class COO(Sparse):
     """
@@ -58,7 +66,7 @@ cdef class COO(Sparse):
         del self.data
         self.data = <vector[double] *>nullptr
 
-    def to_scipy(self, shape=None):
+    def to_scipy(self, shape=None, dtype=np.float64):
         row = <size_t[:self.row.size()]>&d(self.row)[0]
         col = <size_t[:self.col.size()]>&d(self.col)[0]
         data = <double[:self.data.size()]>&d(self.data)[0]
@@ -66,7 +74,7 @@ cdef class COO(Sparse):
         if shape is None:
             shape = self.shape
 
-        return scipy.sparse.coo_matrix((data, (row, col)), dtype=np.float64, shape=shape)
+        return scipy.sparse.coo_matrix((data, (row, col)), dtype=dtype, shape=shape)
 
     cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil:
         self.row.push_back(i)

From c16e3970573bc7bf96383d982b95bb28cbc3daf3 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 7 May 2024 11:35:57 +1000
Subject: [PATCH 44/52] Update docs, add small api tests

---
 .../trip_distribution/plot_route_choice.py    | 51 +++++++++----
 tests/aequilibrae/paths/test_route_choice.py  | 73 +++++++++++++------
 2 files changed, 90 insertions(+), 34 deletions(-)

diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py
index 5f285cb22..a8d1990f7 100644
--- a/docs/source/examples/trip_distribution/plot_route_choice.py
+++ b/docs/source/examples/trip_distribution/plot_route_choice.py
@@ -1,10 +1,11 @@
-"""
-.. _example_usage_route_choice:
+""".. _example_usage_route_choice:
 
 Route Choice
 =================
 
-In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La Serena Metropolitan Area in Chile.
+In this example, we show how to perform route choice set generation using BFSLE and Link penalisation, for a city in La
+Serena Metropolitan Area in Chile.
+
 """
 
 # Imports
@@ -52,7 +53,8 @@
 # let's say we want to minimize the distance
 graph.set_graph("distance")
 
-# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of western centroids.
+# But let's say we only want a skim matrix for nodes 28-40, and 49-60 (inclusive), these happen to be a selection of
+# western centroids.
 graph.prepare_graph(np.array(list(range(28, 41)) + list(range(49, 91))))
 
 # %%
@@ -77,13 +79,15 @@
 from aequilibrae.paths import RouteChoice
 
 # %%
-# This object construct might take a minute depending on the size of the graph due to the construction of the compressed link to network link mapping that's required.
-# This is a one time operation per graph and is cached.
-# We need to supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed.
+# This object construct might take a minute depending on the size of the graph due to the construction of the compressed
+# link to network link mapping that's required.  This is a one time operation per graph and is cached. We need to
+# supply a Graph and optionally a AequilibraeMatrix, if the matrix is not provided link loading cannot be preformed.
 rc = RouteChoice(graph, mat)
 
 # %%
-# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE based on the paper
+
+# Here we'll set the parameters of our set generation. There are two algorithms available: Link penalisation, or BFSLE
+# based on the paper
 # "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013).
 # https://doi.org/10.1080/18128602.2012.671383
 # It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results.
@@ -96,7 +100,8 @@
 print(rc.default_paramaters)
 
 # %%
-# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid as well an an assignment.
+# We can now perform a computation for single OD pair if we'd like. Here we do one between the first and last centroid
+# as well an an assignment.
 results = rc.execute_single(28, 90, perform_assignment=True)
 print(results[0])
 
@@ -106,8 +111,8 @@
 rc.get_results().to_pandas()
 
 # %%
-# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD pairs we'd like to use, or we can provided a 1D list
-# and the generation will be run on all permutations.
+# To perform a batch operation we need to prepare the object first. We can either provide a list of tuple of the OD
+# pairs we'd like to use, or we can provided a 1D list and the generation will be run on all permutations.
 rc.prepare(graph.centroids[:5])  # You can inspect the result with rc.nodes
 
 # %%
@@ -117,8 +122,28 @@
 
 # %%
 # Since we provided a matrix initially we can also perform link loading based on our assignment results.
-# We can specify which link loading we want, either just uncompressed, just compressed, or both.
-rc.get_load_results(which="both")
+rc.get_load_results()
+
+# %%
+# Select link analysis
+# ~~~~~~~~~~~~~~~~~~
+# We can also enable select link analysis by providing the links and the directions that we are interested in
+rc.set_select_links({"sl1": [(5372, 1), (5374, 1)], "sl2": [(23845, 0)]})
+
+# %%
+# We can get then the results in a Pandas data frame for both the network and compressed graph.
+u_sl, c_sl = rc.get_select_link_results()
+u_sl
+
+# %%
+# We can also access the OD matrices for this link loading. These matrices are sparse and can be converted to
+# scipy.sparse matrices for ease of use. They're stored in a dictionary where the key is the matrix name concatenated
+# wit the select link set name via an underscore. These matrices are constructed during `get_select_link_results`.
+list(rc.sl_od_matrix.keys())
+
+# %%
+od_matrix = rc.sl_od_matrix["demand_sl1"]
+od_matrix.to_scipy()
 
 # %%
 project.close()
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 36710f67b..59df125d8 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -349,53 +349,84 @@ def setUp(self) -> None:
         self.mat = self.project.matrices.get_matrix("demand_omx")
         self.mat.computational_view()
 
-    def test_prepare(self):
-        rc = RouteChoice(self.graph, self.mat)
+        self.rc = RouteChoice(self.graph, self.mat)
 
+    def test_prepare(self):
         with self.assertRaises(ValueError):
-            rc.prepare([])
+            self.rc.prepare([])
 
         with self.assertRaises(ValueError):
-            rc.prepare(["1", "2"])
+            self.rc.prepare(["1", "2"])
 
         with self.assertRaises(ValueError):
-            rc.prepare([("1", "2")])
+            self.rc.prepare([("1", "2")])
 
         with self.assertRaises(ValueError):
-            rc.prepare([1])
+            self.rc.prepare([1])
 
-        rc.prepare([1, 2])
-        self.assertListEqual(rc.nodes, [(1, 2), (2, 1)])
-        rc.prepare([(1, 2)])
-        self.assertListEqual(rc.nodes, [(1, 2)])
+        self.rc.prepare([1, 2])
+        self.assertListEqual(self.rc.nodes, [(1, 2), (2, 1)])
+        self.rc.prepare([(1, 2)])
+        self.assertListEqual(self.rc.nodes, [(1, 2)])
 
     def test_set_save_routes(self):
-        rc = RouteChoice(self.graph, self.mat)
+        self.rc = RouteChoice(self.graph, self.mat)
 
         with self.assertRaises(ValueError):
-            rc.set_save_routes("/non-existent-path")
+            self.rc.set_save_routes("/non-existent-path")
 
     def test_set_choice_set_generation(self):
-        rc = RouteChoice(self.graph, self.mat)
-
-        rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
+        self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
         self.assertDictEqual(
-            rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
+            self.rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
         )
 
-        rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1)
+        self.rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1)
         self.assertDictEqual(
-            rc.parameters, {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0}
+            self.rc.parameters,
+            {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0},
         )
 
         with self.assertRaises(ValueError):
-            rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0)
+            self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0)
 
         with self.assertRaises(ValueError):
-            rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1)
+            self.rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1)
 
         with self.assertRaises(AttributeError):
-            rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1)
+            self.rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1)
+
+    def test_link_results(self):
+        self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
+
+        self.rc.set_select_links({"sl1": [(23, 1), (26, 1)], "sl2": [(11, 0)]})
+
+        self.rc.prepare(self.graph.centroids)
+
+        self.rc.execute(perform_assignment=True)
+
+        u, c = self.rc.get_load_results()
+        u_sl, c_sl = self.rc.get_select_link_results()
+
+        pd.testing.assert_frame_equal(u, c)
+        pd.testing.assert_frame_equal(u_sl, c_sl)
+
+        self.assertListEqual(
+            list(u.columns),
+            ["link_id"] + [mat_name + "_" + dir for dir in ["ab", "ba", "tot"] for mat_name in self.mat.names],
+        )
+
+        self.assertListEqual(
+            list(u_sl.columns),
+            ["link_id"]
+            + [
+                mat_name + "_" + sl_name + "_" + dir
+                for sl_name in ["sl1", "sl2"]
+                for dir in ["ab", "ba"]
+                for mat_name in self.mat.names
+            ]
+            + [mat_name + "_" + sl_name + "_tot" for sl_name in ["sl1", "sl2"] for mat_name in self.mat.names],
+        )
 
 
 def generate_line_strings(project, graph, results):

From a7815549f7dfc169b1d9c8df995b1b9e2a379f8f Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 8 May 2024 16:11:54 +1000
Subject: [PATCH 45/52] Add sparse matrix tests and from disk method

---
 aequilibrae/matrix/__init__.py                |  1 +
 aequilibrae/matrix/sparse_matrix.pyx          | 40 +++++++++++++++++++
 .../aequilibrae/matrix/test_sparse_matrix.py  | 38 ++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 tests/aequilibrae/matrix/test_sparse_matrix.py

diff --git a/aequilibrae/matrix/__init__.py b/aequilibrae/matrix/__init__.py
index 39ae5bde4..18521fe8a 100644
--- a/aequilibrae/matrix/__init__.py
+++ b/aequilibrae/matrix/__init__.py
@@ -1,2 +1,3 @@
 from .aequilibrae_matrix import AequilibraeMatrix, matrix_export_types
 from .aequilibrae_data import AequilibraeData, data_export_types
+from .sparse_matrix import Sparse, COO
diff --git a/aequilibrae/matrix/sparse_matrix.pyx b/aequilibrae/matrix/sparse_matrix.pyx
index 3ee7cbb5f..270850223 100644
--- a/aequilibrae/matrix/sparse_matrix.pyx
+++ b/aequilibrae/matrix/sparse_matrix.pyx
@@ -33,6 +33,24 @@ cdef class Sparse:
         finally:
             f.close()
 
+    @classmethod
+    def from_disk(cls, path, names=None, aeq=False):
+        """
+        Read a OMX file and return a dictionary of matrix names to a scipy.sparse matrix, or
+        aequilibrae.matrix.sparse matrix.
+        """
+        f = omx.open_file(path, "r")
+        res = {}
+        try:
+            for matrix in (f.list_matrices() if names is None else names):
+                if aeq:
+                    res[matrix] = cls.from_matrix(f[matrix])
+                else:
+                    res[matrix] = scipy.sparse.csr_matrix(f[matrix])
+            return res
+        finally:
+            f.close()
+
 
 cdef class COO(Sparse):
     """
@@ -67,6 +85,9 @@ cdef class COO(Sparse):
         self.data = <vector[double] *>nullptr
 
     def to_scipy(self, shape=None, dtype=np.float64):
+        """
+        Create scipy.sparse.coo_matrix from this COO matrix.
+        """
         row = <size_t[:self.row.size()]>&d(self.row)[0]
         col = <size_t[:self.col.size()]>&d(self.col)[0]
         data = <double[:self.data.size()]>&d(self.data)[0]
@@ -76,6 +97,25 @@ cdef class COO(Sparse):
 
         return scipy.sparse.coo_matrix((data, (row, col)), dtype=dtype, shape=shape)
 
+    @classmethod
+    def from_matrix(cls, m):
+        """
+        Create COO matrix from an dense or scipy-like matrix.
+        """
+        if not isinstance(m, scipy.sparse.coo_matrix):
+            m = scipy.sparse.coo_matrix(m)
+
+        self = <COO?>cls()
+
+        cdef size_t[:] row = m.row.astype(np.uint64), col = m.row.astype(np.uint64)
+        cdef double[:] data = m.data
+
+        self.row.insert(self.row.end(), &row[0], &row[-1] + 1)
+        self.col.insert(self.col.end(), &col[0], &col[-1] + 1)
+        self.data.insert(self.data.end(), &data[0], &data[-1] + 1)
+
+        return self
+
     cdef void append(COO self, size_t i, size_t j, double v) noexcept nogil:
         self.row.push_back(i)
         self.col.push_back(j)
diff --git a/tests/aequilibrae/matrix/test_sparse_matrix.py b/tests/aequilibrae/matrix/test_sparse_matrix.py
new file mode 100644
index 000000000..f75dc70da
--- /dev/null
+++ b/tests/aequilibrae/matrix/test_sparse_matrix.py
@@ -0,0 +1,38 @@
+from tempfile import gettempdir
+from aequilibrae.matrix import COO
+from unittest import TestCase
+from uuid import uuid4
+import scipy.sparse
+import numpy as np
+import pathlib
+
+
+class TestSparseMatrix(TestCase):
+    def setUp(self) -> None:
+        self.data = np.full((100, 100), 5.0)
+        self.dir = pathlib.Path(gettempdir()) / uuid4().hex
+        self.dir.mkdir()
+
+    def tearDown(self) -> None:
+        pass
+
+    def test_round_trip(self):
+        p = self.dir / "test.omx"
+
+        coo = COO.from_matrix(
+            self.data,
+        )
+        coo.to_disk(p, "m1")
+        coo.to_disk(p, "m2")
+
+        sp = coo.to_scipy()
+
+        coo1 = COO.from_disk(p)
+        coo2 = COO.from_disk(p, aeq=True)
+
+        for m in ["m1", "m2"]:
+            self.assertIsInstance(coo1[m], scipy.sparse.csr_matrix)
+            self.assertIsInstance(coo2[m], COO)
+
+            np.testing.assert_allclose(sp.A, coo1[m].A)
+            np.testing.assert_allclose(sp.A, coo2[m].to_scipy().A)

From 9e39428c4fb7b7ae81d67b636db33cc5e585cad1 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 22 May 2024 15:33:25 +1000
Subject: [PATCH 46/52] Add link loading and select link results saving

---
 aequilibrae/paths/route_choice.py            | 119 +++++++++++++++++--
 tests/aequilibrae/paths/test_route_choice.py |  34 +++++-
 2 files changed, 145 insertions(+), 8 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 8b836a42d..00753f4ad 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -3,6 +3,8 @@
 import logging
 import pathlib
 import socket
+import sqlite3
+from datetime import datetime
 from typing import List, Optional, Tuple, Union, Dict
 from uuid import uuid4
 from functools import cached_property
@@ -27,7 +29,8 @@ class RouteChoice:
 
     def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None):
         self.parameters = self.default_paramaters.copy()
-        self.procedure_id = uuid4().hex
+        self.procedure_id = None
+        self.procedure_date = None
 
         proj = project or get_active_project(must_exist=False)
         self.project = proj
@@ -192,6 +195,8 @@ def execute_single(self, origin: int, destination: int, perform_assignment: bool
         :Returns:
             ***route set** (:obj:`List[Tuple[int]]`): A list of routes as tuples of link IDs.
         """
+        self.procedure_id = uuid4().hex
+        self.procedure_date = str(datetime.today())
 
         self.results = None
         return self.__rc.run(
@@ -221,6 +226,8 @@ def execute(self, perform_assignment: bool = False) -> None:
                 "to perform batch route choice generation you must first prepare with the selected nodes. See `RouteChoice.prepare()`"
             )
 
+        self.procedure_date = str(datetime.today())
+
         self.results = None
         self.__rc.batched(
             self.nodes,
@@ -244,11 +251,14 @@ def info(self) -> dict:
             **info** (:obj:`dict`): Dictionary with summary information
         """
 
-        matrix_totals = (
-            {nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)}
-            if self.matrix is not None
-            else None
-        )
+        if self.matrix is None:
+            matrix_totals = {}
+        elif len(self.matrix.view_names) == 1:
+            matrix_totals = {self.matrix.view_names[0]: np.sum(self.matrix.matrix_view[:, :])}
+        else:
+            matrix_totals = {
+                nm: np.sum(self.matrix.matrix_view[:, :, i]) for i, nm in enumerate(self.matrix.view_names)
+            }
 
         info = {
             "Algorithm": self.algorithm,
@@ -389,7 +399,7 @@ def get_select_link_results(self) -> pd.DataFrame:
         Get the select link loading results.
 
         :Returns:
-            **dataset** (:obj:`Union[Tuple[pd.DataFrame, pd.DataFrame], pd.DataFrame]`):
+            **dataset** (:obj:`Tuple[pd.DataFrame, pd.DataFrame]`):
                 A tuple of uncompressed and compressed select link loading results as DataFrames.
                 Columns are the matrix name concatenated with the select link set and direction.
         """
@@ -423,3 +433,98 @@ def get_select_link_results(self) -> pd.DataFrame:
         compressed_df = self.__link_loads_to_df(m_compact, compact_lids, self.sl_compact_link_loads)
 
         return uncompressed_df, compressed_df
+
+    def __save_dataframe(self, df, method_name: str, description: str, table_name: str, report: dict, project) -> None:
+        self.procedure_id = uuid4().hex
+        data = [
+            table_name,
+            "select link",
+            self.procedure_id,
+            str(report),
+            self.procedure_date,
+            description,
+        ]
+
+        # sqlite3 context managers only commit, they don't close, oh well
+        conn = sqlite3.connect(pathlib.Path(project.project_base_path) / "results_database.sqlite")
+        with conn:
+            df.to_sql(table_name, conn, index=False)
+        conn.close()
+
+        conn = project.connect()
+        with conn:
+            conn.execute(
+                """Insert into results(table_name, procedure, procedure_id, procedure_report, timestamp,
+                                                description) Values(?,?,?,?,?,?)""",
+                data,
+            )
+        conn.close()
+
+    def save_link_flows(self, table_name: str, project=None) -> None:
+        """
+        Saves the link link flows for all classes into the results database.
+
+        :Arguments:
+            **table_name** (:obj:`str`): Name of the table being inserted to.
+            **project** (:obj:`Project`, `Optional`): Project we want to save the results to.
+            Defaults to the active project
+        """
+        if not project:
+            project = self.project or get_active_project()
+
+        u, c = self.get_load_results()
+        info = self.info()
+        self.__save_dataframe(
+            u,
+            "Link loading",
+            "Uncompressed link loading results",
+            table_name + "_uncompressed",
+            info,
+            project=project,
+        )
+
+        self.__save_dataframe(
+            c,
+            "Link loading",
+            "Compressed link loading results",
+            table_name + "_compressed",
+            info,
+            project=project,
+        )
+
+    def save_select_link_flows(self, table_name: str, project=None) -> None:
+        """
+        Saves the select link link flows for all classes into the results database. Additionally, it exports
+        the OD matrices into OMX format.
+
+        :Arguments:
+            **table_name** (:obj:`str`): Name of the table being inserted to and the name of the
+            OpenMatrix file used for OD matrices.
+            **project** (:obj:`Project`, `Optional`): Project we want to save the results to.
+            Defaults to the active project
+        """
+        if not project:
+            project = self.project or get_active_project()
+
+        u, c = self.get_select_link_results()
+        info = self.info()
+        self.__save_dataframe(
+            u,
+            "Select link analysis",
+            "Uncompressed select link analysis results",
+            table_name + "_uncompressed",
+            info,
+            project=project,
+        )
+
+        self.__save_dataframe(
+            c,
+            "Select link analysis",
+            "Compressed select link analysis results",
+            table_name + "_compressed",
+            info,
+            project=project,
+        )
+
+        for k, v in self.sl_od_matrix.items():
+            v.to_disk((pathlib.Path(project.project_base_path) / "matrices" / table_name).with_suffix(".omx"), k)
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index 59df125d8..de3f257de 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -2,6 +2,8 @@
 import uuid
 import zipfile
 from os.path import join, dirname
+import pathlib
+import sqlite3
 from tempfile import gettempdir
 from unittest import TestCase
 import pandas as pd
@@ -11,7 +13,7 @@
 from aequilibrae import Project
 from aequilibrae.paths.route_choice_set import RouteChoiceSet
 from aequilibrae.paths.route_choice import RouteChoice
-from aequilibrae.matrix import AequilibraeMatrix
+from aequilibrae.matrix import AequilibraeMatrix, Sparse
 
 from ...data import siouxfalls_project
 
@@ -428,6 +430,36 @@ def test_link_results(self):
             + [mat_name + "_" + sl_name + "_tot" for sl_name in ["sl1", "sl2"] for mat_name in self.mat.names],
         )
 
+    def test_saving(self):
+        self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
+        self.rc.set_select_links({"sl1": [(23, 1), (26, 1)], "sl2": [(11, 0)]})
+        self.rc.prepare(self.graph.centroids)
+        self.rc.execute(perform_assignment=True)
+        u, c = self.rc.get_load_results()
+        u_sl, c_sl = self.rc.get_select_link_results()
+
+        self.rc.save_link_flows("ll")
+        self.rc.save_select_link_flows("sl")
+
+        conn = sqlite3.connect(pathlib.Path(self.project.project_base_path) / "results_database.sqlite")
+        with conn:
+            for table, df in [
+                ("ll_uncompressed", u),
+                ("ll_compressed", c),
+                ("sl_uncompressed", u_sl),
+                ("sl_compressed", c_sl),
+            ]:
+                with self.subTest(table=table):
+                    pd.testing.assert_frame_equal(pd.read_sql(f"select * from {table}", conn), df)
+        conn.close()
+
+        matrices = Sparse.from_disk(
+            (pathlib.Path(self.project.project_base_path) / "matrices" / "sl").with_suffix(".omx")
+        )
+
+        for name, matrix in self.rc.sl_od_matrix.items():
+            np.testing.assert_allclose(matrix.to_scipy().A, matrices[name].A)
+
 
 def generate_line_strings(project, graph, results):
     """Debug method"""

From 92496c01ec797c162bb5799dd480d83f1da09133 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Wed, 22 May 2024 17:35:30 +1000
Subject: [PATCH 47/52] WIP: add LP to BFSLE, each depth penalises the next
 depths base graph

---
 aequilibrae/paths/route_choice.py      |  4 ++--
 aequilibrae/paths/route_choice_set.pxd |  1 +
 aequilibrae/paths/route_choice_set.pyx | 24 ++++++++++++++++++------
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 00753f4ad..63f10a51f 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -22,8 +22,8 @@ class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
 
     default_paramaters = {
-        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100},
-        "link-penalisation": {"penalty": 1.1},
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.1},
+        "link-penalisation": {},
         "bfsle": {"beta": 1.0, "theta": 1.0},
     }
 
diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index c4a016194..7c86ded04 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -180,6 +180,7 @@ cdef class RouteChoiceSet:
         long long [:] thread_conn,
         long long [:] thread_b_nodes,
         long long [:] _thread_reached_first,
+        double penatly,
         unsigned int seed
     ) noexcept nogil
 
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index a55e8e1e5..88c92c8ff 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -12,7 +12,7 @@ from libc.math cimport INFINITY, exp, pow
 from libc.stdlib cimport abort
 from libc.string cimport memcpy
 from libcpp cimport nullptr
-from libcpp.algorithm cimport lower_bound, reverse, sort
+from libcpp.algorithm cimport lower_bound, reverse, sort, copy
 from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
 from libcpp.utility cimport pair
@@ -277,11 +277,11 @@ cdef class RouteChoiceSet:
         if max_routes < 0 or max_depth < 0:
             raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative")
 
-        if penalty != 0.0 and bfsle:
-            raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once")
+        # if penalty != 0.0 and bfsle:
+        #     raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once")
 
-        if not bfsle and penalty <= 1.0:
-            raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended")
+        # if penalty <= 1.0:
+        #     raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended")
 
         if path_size_logit and (beta < 0 or theta <= 0):
             raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model")
@@ -410,6 +410,7 @@ cdef class RouteChoiceSet:
                             conn_matrix[threadid()],
                             b_nodes_matrix[threadid()],
                             _reached_first_matrix[threadid()],
+                            penalty,
                             c_seed,
                         )
                     else:
@@ -562,6 +563,7 @@ cdef class RouteChoiceSet:
         long long [:] thread_conn,
         long long [:] thread_b_nodes,
         long long [:] _thread_reached_first,
+        double penatly,
         unsigned int seed
     ) noexcept nogil:
         """Main method for route set generation. See top of file for commentary."""
@@ -579,6 +581,11 @@ cdef class RouteChoiceSet:
             pair[RouteSet_t.iterator, bool] status
             unsigned int miss_count = 0
             long long p, connector
+            vector[double] penalised_cost = vector[double](self.cost_view.shape[0])
+            vector[double] next_penalised_cost = vector[double](self.cost_view.shape[0])
+
+        copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin())
+        copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin())
 
         max_routes = max_routes if max_routes != 0 else UINT_MAX
         max_depth = max_depth if max_depth != 0 else UINT_MAX
@@ -600,7 +607,7 @@ cdef class RouteChoiceSet:
             for banned in queue:
                 # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and
                 # reverse them as well
-                memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double))
+                memcpy(&thread_cost[0], &penalised_cost[0], penalised_cost.size() * sizeof(double))
 
                 for connector in deref(banned):
                     thread_cost[connector] = INFINITY
@@ -628,6 +635,7 @@ cdef class RouteChoiceSet:
                     while p != origin_index:
                         connector = thread_conn[p]
                         p = thread_predecessors[p]
+                        next_penalised_cost[connector] *= penatly
                         vec.push_back(connector)
 
                     reverse(vec.begin(), vec.end())
@@ -657,6 +665,8 @@ cdef class RouteChoiceSet:
             queue.swap(next_queue)
             next_queue.clear()
 
+            copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin())
+
         # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those
         for banned in queue:
             del banned
@@ -900,6 +910,8 @@ cdef class RouteChoiceSet:
 
         return prob_vec
 
+TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later)
+
     @cython.embedsignature(True)
     def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0):
         """

From daa3aeebed2870d6ceccd9a7e73a0b47323c4478 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 28 May 2024 13:18:10 +1000
Subject: [PATCH 48/52] Add optional link penalisation to BFSLE

---
 aequilibrae/paths/route_choice.py      | 12 +++--
 aequilibrae/paths/route_choice_set.pyx | 66 ++++++++++++++++++--------
 2 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 63f10a51f..dcf6f05df 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -22,7 +22,7 @@ class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
 
     default_paramaters = {
-        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.1},
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01},
         "link-penalisation": {},
         "bfsle": {"beta": 1.0, "theta": 1.0},
     }
@@ -64,8 +64,7 @@ def __rc(self) -> RouteChoiceSet:
         return RouteChoiceSet(self.graph)
 
     def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
-        """
-        Chooses the assignment algorithm and set parameters.
+        """Chooses the assignment algorithm and set parameters.
         Options for algorithm are, 'bfsle' for breadth first search with link removal, or 'link-penalisation'/'link-penalization'.
 
         BFSLE implementation based on "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler,
@@ -77,7 +76,6 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
         Setting the parameters for the route choice:
 
         `beta`, `theta`, and `seed` are BFSLE specific parameters.
-        `penalty` is a link penalisation specific parameter.
 
         Setting `max_depth` or `max_misses`, while not required, is strongly recommended to prevent runaway algorithms.
         `max_misses` is the maximum amount of duplicate routes found per OD pair. If it is exceeded then the route set
@@ -94,6 +92,12 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
             specifically it's related to the log base `penalty` of the ratio of costs between two alternative routes.
             If it is exceeded then the route set if returned with fewer than `max_routes`.
 
+        Additionally BFSLE has the option to incorporate link penalisation. Every link in all routes found at a depth
+        are penalised with the `penalty` factor for the next depth. So at a depth of 0 no links are penalised nor
+        removed. At depth 1, all links found at depth 0 are penalised, then the links marked for removal are removed.
+        All links in the routes found at depth 1 are then penalised for the next depth. The penalisation compounds.
+        Pass set `penalty=1.0` to disable.
+
         :Arguments:
             **algorithm** (:obj:`str`): Algorithm to be used
             **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 88c92c8ff..501d791bb 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -19,6 +19,8 @@ from libcpp.utility cimport pair
 from libcpp.vector cimport vector
 from openmp cimport omp_get_max_threads
 
+from libc.stdio cimport fprintf, stderr
+
 import itertools
 import logging
 import pathlib
@@ -239,7 +241,7 @@ cdef class RouteChoiceSet:
             cores: int = 0,
             a_star: bool = True,
             bfsle: bool = True,
-            penalty: float = 0.0,
+            penalty: float = 1.0,
             where: Optional[str] = None,
             path_size_logit: bool = False,
             beta: float = 1.0,
@@ -264,8 +266,7 @@ cdef class RouteChoiceSet:
                 Default of ``0`` for all available.
             **bfsle** (:obj:`bool`): Whether to use Breadth First Search with Link Removal (BFSLE) over link
                 penalisation. Default ``True``.
-            **penalty** (:obj:`float`): Penalty to use for Link Penalisation. Must be ``> 1.0``. Not compatible
-                with ``bfsle=True``.
+            **penalty** (:obj:`float`): Penalty to use for Link Penalisation and BFSLE with LP.
             **where** (:obj:`str`): Optional file path to save results to immediately. Will return None.
         """
         cdef:
@@ -277,12 +278,6 @@ cdef class RouteChoiceSet:
         if max_routes < 0 or max_depth < 0:
             raise ValueError("`max_routes`, `max_depth`, and `cores` must be non-negative")
 
-        # if penalty != 0.0 and bfsle:
-        #     raise ValueError("Link penalisation (`penalty` > 1.0) and `bfsle` cannot be enabled at once")
-
-        # if penalty <= 1.0:
-        #     raise ValueError("`penalty` must be > 1.0. `penalty=1.1` is recommended")
-
         if path_size_logit and (beta < 0 or theta <= 0):
             raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model")
 
@@ -568,24 +563,29 @@ cdef class RouteChoiceSet:
     ) noexcept nogil:
         """Main method for route set generation. See top of file for commentary."""
         cdef:
+            # Output
             RouteSet_t *route_set
+
+            # Scratch objects
             LinkSet_t removed_links
             minstd_rand rng
 
-            # Scratch objects
+            # These objects are juggled to prevent more allocations than necessary
             vector[unordered_set[long long] *] queue
             vector[unordered_set[long long] *] next_queue
             unordered_set[long long] *banned
             unordered_set[long long] *new_banned
+
+            # Local variables, Cython doesn't allow conditional declarations
             vector[long long] *vec
             pair[RouteSet_t.iterator, bool] status
             unsigned int miss_count = 0
             long long p, connector
-            vector[double] penalised_cost = vector[double](self.cost_view.shape[0])
-            vector[double] next_penalised_cost = vector[double](self.cost_view.shape[0])
 
-        copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin())
-        copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin())
+            # Link penalisation, only used when penalty != 1.0
+            bint lp = penatly != 1.0
+            vector[double] *penalised_cost = <vector[double] *>nullptr
+            vector[double] *next_penalised_cost = <vector[double] *>nullptr
 
         max_routes = max_routes if max_routes != 0 else UINT_MAX
         max_depth = max_depth if max_depth != 0 else UINT_MAX
@@ -594,6 +594,13 @@ cdef class RouteChoiceSet:
         route_set = new RouteSet_t()
         rng.seed(seed)
 
+        if lp:
+            # Although we don't need the dynamic ability of vectors here, Cython doesn't have the std::array module.
+            penalised_cost = new vector[double](self.cost_view.shape[0])
+            next_penalised_cost = new vector[double](self.cost_view.shape[0])
+            copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], penalised_cost.begin())
+            copy(&self.cost_view[0], &self.cost_view[0] + self.cost_view.shape[0], next_penalised_cost.begin())
+
         # We'll go at most `max_depth` iterations down, at each depth we maintain a queue of the next set of banned
         # edges to consider
         for depth in range(max_depth):
@@ -605,9 +612,12 @@ cdef class RouteChoiceSet:
                 shuffle(queue.begin(), queue.end(), rng)
 
             for banned in queue:
-                # Copying the costs back into the scratch costs buffer. We could keep track of the modifications and
-                # reverse them as well
-                memcpy(&thread_cost[0], &penalised_cost[0], penalised_cost.size() * sizeof(double))
+                if lp:
+                    # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation
+                    copy(penalised_cost.cbegin(), penalised_cost.cend(), &thread_cost[0])
+                else:
+                    # Otherwise we just copy directly from the cost view
+                    memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double))
 
                 for connector in deref(banned):
                     thread_cost[connector] = INFINITY
@@ -635,9 +645,15 @@ cdef class RouteChoiceSet:
                     while p != origin_index:
                         connector = thread_conn[p]
                         p = thread_predecessors[p]
-                        next_penalised_cost[connector] *= penatly
                         vec.push_back(connector)
 
+                    if lp:
+                        # Here we penalise all seen links for the *next* depth. If we penalised on the current depth
+                        # then we would introduce a bias for earlier seen paths
+                        for connector in deref(vec):
+                            # *= does not work
+                            deref(next_penalised_cost)[connector] = penatly * deref(next_penalised_cost)[connector]
+
                     reverse(vec.begin(), vec.end())
 
                     for connector in deref(vec):
@@ -665,7 +681,10 @@ cdef class RouteChoiceSet:
             queue.swap(next_queue)
             next_queue.clear()
 
-            copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin())
+            if lp:
+                # Update the penalised_cost vector, since next_penalised_cost is always the one updated we just need to
+                # bring penalised_cost up to date.
+                copy(next_penalised_cost.cbegin(), next_penalised_cost.cend(), penalised_cost.begin())
 
         # We may have added more banned link sets to the queue then found out we hit the max depth, we should free those
         for banned in queue:
@@ -675,6 +694,11 @@ cdef class RouteChoiceSet:
         for banned in removed_links:
             del banned
 
+        if lp:
+            # If we had enabled link penalisation, we'll need to free those vectors as well
+            del penalised_cost
+            del next_penalised_cost
+
         return route_set
 
     @cython.wraparound(False)
@@ -737,7 +761,7 @@ cdef class RouteChoiceSet:
                     vec.push_back(connector)
 
                 for connector in deref(vec):
-                    thread_cost[connector] *= penatly
+                    thread_cost[connector] = penatly * thread_cost[connector]
 
                 reverse(vec.begin(), vec.end())
 
@@ -910,7 +934,7 @@ cdef class RouteChoiceSet:
 
         return prob_vec
 
-TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later)
+# TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later)
 
     @cython.embedsignature(True)
     def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0):

From 2e829c661eeb95f6ad2f9f215546fd00de55a33b Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 28 May 2024 16:28:49 +1000
Subject: [PATCH 49/52] Add binary logit cut offs for assignment

---
 aequilibrae/paths/route_choice.py      | 13 +++++-
 aequilibrae/paths/route_choice_set.pxd |  3 +-
 aequilibrae/paths/route_choice_set.pyx | 57 ++++++++++++++++++++------
 3 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index dcf6f05df..4a21e4695 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -22,7 +22,7 @@ class RouteChoice:
     all_algorithms = ["bfsle", "lp", "link-penalisation", "link-penalization"]
 
     default_paramaters = {
-        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01},
+        "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 1.0},
         "link-penalisation": {},
         "bfsle": {"beta": 1.0, "theta": 1.0},
     }
@@ -98,6 +98,15 @@ def set_choice_set_generation(self, /, algorithm: str, **kwargs) -> None:
         All links in the routes found at depth 1 are then penalised for the next depth. The penalisation compounds.
         Pass set `penalty=1.0` to disable.
 
+        When performing an assignment, `cutoff_prob` can be provided to exclude routes from the path-sized logit model.
+        The `cutoff_prob` is used to compute an inverse binary logit and obtain a max difference in utilities. If a
+        paths total cost is greater than the minimum cost path in the route set plus the max difference, the route is
+        excluded from the PSL calculations. The route is still returned, but with a probability of 0.0.
+
+        The `cutoff_prob` should be in the range [0, 1]. It is then rescaled internally to [0.5, 1] as probabilities
+        below 0.5 produce negative differences in utilities. A higher `cutoff_prob` includes more routes. A value of
+        `0.0` will only include the minimum cost route. A value of `1.0` includes all routes.
+
         :Arguments:
             **algorithm** (:obj:`str`): Algorithm to be used
             **kwargs** (:obj:`dict`): Dictionary with all parameters for the algorithm
@@ -131,7 +140,7 @@ def set_cores(self, cores: int) -> None:
         self.cores = cores
 
     def set_save_path_files(self, save_it: bool) -> None:
-        """Turn path saving on or off.
+        """turn path saving on or off.
 
         :Arguments:
             **save_it** (:obj:`bool`): Boolean to indicate whether paths should be saved
diff --git a/aequilibrae/paths/route_choice_set.pxd b/aequilibrae/paths/route_choice_set.pxd
index 7c86ded04..85f675972 100644
--- a/aequilibrae/paths/route_choice_set.pxd
+++ b/aequilibrae/paths/route_choice_set.pxd
@@ -219,7 +219,8 @@ cdef class RouteChoiceSet:
         vector[double] &total_cost,
         vector[double] &path_overlap_vec,
         double beta,
-        double theta
+        double theta,
+        double cutoff_prob
     ) noexcept nogil
 
     @staticmethod
diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 501d791bb..476816ae1 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -8,11 +8,11 @@ from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as inc
 from cython.parallel cimport parallel, prange, threadid
 from libc.limits cimport UINT_MAX
-from libc.math cimport INFINITY, exp, pow
+from libc.math cimport INFINITY, exp, pow, log
 from libc.stdlib cimport abort
 from libc.string cimport memcpy
 from libcpp cimport nullptr
-from libcpp.algorithm cimport lower_bound, reverse, sort, copy
+from libcpp.algorithm cimport lower_bound, reverse, sort, copy, min_element
 from libcpp.unordered_map cimport unordered_map
 from libcpp.unordered_set cimport unordered_set
 from libcpp.utility cimport pair
@@ -246,6 +246,7 @@ cdef class RouteChoiceSet:
             path_size_logit: bool = False,
             beta: float = 1.0,
             theta: float = 1.0,
+            cutoff_prob: float = 1.0,
     ):
         """Compute the a route set for a list of OD pairs.
 
@@ -281,6 +282,9 @@ cdef class RouteChoiceSet:
         if path_size_logit and (beta < 0 or theta <= 0):
             raise ValueError("`beta` must be >= 0 and `theta` > 0 for path sized logit model")
 
+        if path_size_logit and not 0.0 <= cutoff_prob <= 1.0:
+            raise ValueError("`cutoff_prob` must be 0 <= `cutoff_prob` <= 1 for path sized logit model")
+
         for o, d in ods:
             if self.nodes_to_indices_view[o] == -1:
                 raise ValueError(f"Origin {o} is not present within the compact graph")
@@ -295,6 +299,9 @@ cdef class RouteChoiceSet:
             unsigned int c_seed = seed
             unsigned int c_cores = cores if cores > 0 else omp_get_max_threads()
 
+            # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values
+            double scaled_cutoff_prob = cutoff_prob * 0.5 + 0.5
+
             vector[pair[long long, long long]] c_ods
 
             # A* (and Dijkstra's) require memory views, so we must allocate here and take slices. Python can handle this
@@ -439,7 +446,8 @@ cdef class RouteChoiceSet:
                             deref(deref(cost_set)[i]),
                             deref(deref(path_overlap_set)[i]),
                             beta,
-                            theta
+                            theta,
+                            scaled_cutoff_prob
                         )
                         # While we need the unique sorted links (.first), we don't need the frequencies (.second)
                         del freq_pair.second
@@ -909,9 +917,15 @@ cdef class RouteChoiceSet:
         vector[double] &total_cost,
         vector[double] &path_overlap_vec,
         double beta,
-        double theta
+        double theta,
+        double cutoff_prob
     ) noexcept nogil:
-        """Compute a probability for each route in the route set based on the path overlap."""
+        """Compute a probability for each route in the route set based on the path overlap.
+
+        Computes a binary logit between the minimum cost path and each path, if the total cost is greater than the
+        minimum + the difference in utilities required to produce the cut-off probability then the route is excluded from
+        the route set.
+        """
         cdef:
             # Scratch objects
             vector[double] *prob_vec
@@ -919,23 +933,31 @@ cdef class RouteChoiceSet:
             long long route_set_idx
             size_t i, j
 
+            vector[bool] route_mask = vector[bool](total_cost.size())
+            double cutoff_cost = deref(min_element(total_cost.cbegin(), total_cost.cend())) \
+                + inverse_binary_logit(cutoff_prob, 0.0, 1.0)
+
         prob_vec = new vector[double]()
         prob_vec.reserve(total_cost.size())
 
+        for i in range(total_cost.size()):
+            route_mask[i] = total_cost[i] <= cutoff_cost
+
         # Beware when refactoring the below, the scale of the costs may cause floating point errors. Large costs will
         # lead to NaN results
         for i in range(total_cost.size()):
-            inv_prob = 0.0
-            for j in range(total_cost.size()):
-                inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \
-                    * exp(-theta * (total_cost[j] - total_cost[i]))
-
-            prob_vec.push_back(1.0 / inv_prob)
+            if route_mask[i]:
+                inv_prob = 0.0
+                for j in range(total_cost.size()):
+                    if route_mask[j]:
+                        inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \
+                            * exp(-theta * (total_cost[j] - total_cost[i]))
+                prob_vec.push_back(1.0 / inv_prob)
+            else:
+                prob_vec.push_back(0.0)
 
         return prob_vec
 
-# TODO: Reverse binary logit to solve for an absolute max cost based on a probability and min cost. Use this to filter out particular routes when assiging (Will need to adjust path overlap/compute a mask to determine which routes to skip later)
-
     @cython.embedsignature(True)
     def link_loading(RouteChoiceSet self, matrix, generate_path_files: bool = False, cores: int = 0):
         """
@@ -1426,3 +1448,12 @@ cdef class Checkpoint:
     @staticmethod
     def batches(ods: List[Tuple[int, int]]):
         return (list(g) for k, g in itertools.groupby(sorted(ods), key=lambda x: x[0]))
+
+
+cdef double inverse_binary_logit(double prob, double beta0, double beta1) noexcept nogil:
+    if prob == 1.0:
+        return INFINITY
+    elif prob == 0.0:
+        return -INFINITY
+    else:
+        return (log(prob / (1.0 - prob)) - beta0) / beta1

From 4ea4e0db3250a4371d962b281040479c780aff55 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 28 May 2024 16:43:23 +1000
Subject: [PATCH 50/52] Update tests

---
 aequilibrae/paths/route_choice.py            |  2 +-
 tests/aequilibrae/paths/test_route_choice.py | 36 ++++++++++++--------
 2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/aequilibrae/paths/route_choice.py b/aequilibrae/paths/route_choice.py
index 4a21e4695..c5b1e553f 100644
--- a/aequilibrae/paths/route_choice.py
+++ b/aequilibrae/paths/route_choice.py
@@ -24,7 +24,7 @@ class RouteChoice:
     default_paramaters = {
         "generic": {"seed": 0, "max_routes": 0, "max_depth": 0, "max_misses": 100, "penalty": 1.01, "cutoff_prob": 1.0},
         "link-penalisation": {},
-        "bfsle": {"beta": 1.0, "theta": 1.0},
+        "bfsle": {"beta": 1.0, "theta": 1.0, "penalty": 1.0},
     }
 
     def __init__(self, graph: Graph, matrix: Optional[AequilibraeMatrix] = None, project=None):
diff --git a/tests/aequilibrae/paths/test_route_choice.py b/tests/aequilibrae/paths/test_route_choice.py
index de3f257de..5b3fbc88f 100644
--- a/tests/aequilibrae/paths/test_route_choice.py
+++ b/tests/aequilibrae/paths/test_route_choice.py
@@ -45,7 +45,7 @@ def test_route_choice(self):
         rc = RouteChoiceSet(self.graph)
         a, b = 1, 20
 
-        for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}]:
+        for kwargs in [{"bfsle": True}, {"bfsle": False, "penalty": 1.1}, {"bfsle": True, "penalty": 1.1}]:
             with self.subTest(**kwargs):
                 results = rc.run(a, b, max_routes=10, **kwargs)
                 self.assertEqual(len(results), 10, "Returned more routes than expected")
@@ -150,10 +150,6 @@ def test_route_choice_exceptions(self):
                 with self.assertRaises(ValueError):
                     rc.run(a, b, max_routes=max_routes, max_depth=max_depth)
 
-        with self.assertRaises(ValueError):
-            rc.run(1, 1, max_routes=1, max_depth=1, bfsle=True, penalty=1.5)
-            rc.run(1, 1, max_routes=1, max_depth=1, bfsle=False, penalty=0.1)
-
     def test_round_trip(self):
         np.random.seed(1000)
         rc = RouteChoiceSet(self.graph)
@@ -210,12 +206,15 @@ def test_prob_results(self):
         np.random.seed(0)
         rc = RouteChoiceSet(self.graph)
         nodes = [tuple(x) for x in np.random.choice(self.graph.centroids, size=(10, 2), replace=False)]
-        rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True)
-        table = rc.get_results().to_pandas()
 
-        gb = table.groupby(by=["origin id", "destination id"])
-        for od, df in gb:
-            self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0")
+        for kwargs in [{"cutoff_prob": 0.0}, {"cutoff_prob": 0.5}, {"cutoff_prob": 1.0}]:
+            with self.subTest(**kwargs):
+                rc.batched(nodes, max_routes=20, max_depth=10, path_size_logit=True, **kwargs)
+                table = rc.get_results().to_pandas()
+
+                gb = table.groupby(by=["origin id", "destination id"])
+                for od, df in gb:
+                    self.assertAlmostEqual(1.0, sum(df["probability"].values), msg=", probability not close to 1.0")
 
     def test_link_loading(self):
         np.random.seed(0)
@@ -380,21 +379,28 @@ def test_set_save_routes(self):
     def test_set_choice_set_generation(self):
         self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1)
         self.assertDictEqual(
-            self.rc.parameters, {"max_routes": 20, "penalty": 1.1, "max_depth": 0, "max_misses": 100, "seed": 0}
+            self.rc.parameters,
+            {"seed": 0, "max_routes": 20, "max_depth": 0, "max_misses": 100, "penalty": 1.1, "cutoff_prob": 1.0},
         )
 
         self.rc.set_choice_set_generation("bfsle", max_routes=20, beta=1.1)
         self.assertDictEqual(
             self.rc.parameters,
-            {"max_routes": 20, "beta": 1.1, "theta": 1.0, "max_depth": 0, "max_misses": 100, "seed": 0},
+            {
+                "seed": 0,
+                "max_routes": 20,
+                "max_depth": 0,
+                "max_misses": 100,
+                "beta": 1.1,
+                "theta": 1.0,
+                "penalty": 1.0,
+                "cutoff_prob": 1.0,
+            },
         )
 
         with self.assertRaises(ValueError):
             self.rc.set_choice_set_generation("link-penalization", max_routes=20, penalty=1.1, beta=1.0)
 
-        with self.assertRaises(ValueError):
-            self.rc.set_choice_set_generation("bfsle", max_routes=20, penalty=1.1)
-
         with self.assertRaises(AttributeError):
             self.rc.set_choice_set_generation("not an algorithm", max_routes=20, penalty=1.1)
 

From d552901bdb058e87597737b95d1efaf0b6faf94b Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 28 May 2024 16:48:46 +1000
Subject: [PATCH 51/52] Update example

---
 .../trip_distribution/plot_route_choice.py      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/source/examples/trip_distribution/plot_route_choice.py b/docs/source/examples/trip_distribution/plot_route_choice.py
index a8d1990f7..4c00f0949 100644
--- a/docs/source/examples/trip_distribution/plot_route_choice.py
+++ b/docs/source/examples/trip_distribution/plot_route_choice.py
@@ -90,6 +90,23 @@
 # based on the paper
 # "Route choice sets for very high-resolution data" by Nadine Rieser-Schüssler, Michael Balmer & Kay W. Axhausen (2013).
 # https://doi.org/10.1080/18128602.2012.671383
+#
+# Our BFSLE implementation is slightly different and has extended to allow applying link penalisation as well. Every
+# link in all routes found at a depth are penalised with the `penalty` factor for the next depth. So at a depth of 0 no
+# links are penalised nor removed. At depth 1, all links found at depth 0 are penalised, then the links marked for
+# removal are removed. All links in the routes found at depth 1 are then penalised for the next depth. The penalisation
+# compounds. Pass set `penalty=1.0` to disable.
+#
+# To assist in filtering out bad results during the assignment, a `cutoff_prob` parameter can be provided to exclude
+# routes from the path-sized logit model. The `cutoff_prob` is used to compute an inverse binary logit and obtain a max
+# difference in utilities. If a paths total cost is greater than the minimum cost path in the route set plus the max
+# difference, the route is excluded from the PSL calculations. The route is still returned, but with a probability of
+# 0.0.
+#
+# The `cutoff_prob` should be in the range [0, 1]. It is then rescaled internally to [0.5, 1] as probabilities below 0.5
+# produce negative differences in utilities. A higher `cutoff_prob` includes more routes. A value of `0.0` will only
+# include the minimum cost route. A value of `1.0` includes all routes.
+#
 # It is highly recommended to set either `max_routes` or `max_depth` to prevent runaway results.
 
 # rc.set_choice_set_generation("link-penalisation", max_routes=5, penalty=1.1)

From 086a2dbe868ade1c17bfa22038474fe11dd9f666 Mon Sep 17 00:00:00 2001
From: Jake-Moss <jake.moss@uqconnect.edu.au>
Date: Tue, 28 May 2024 17:00:59 +1000
Subject: [PATCH 52/52] Some nicer comments

---
 aequilibrae/paths/route_choice_set.pyx | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/aequilibrae/paths/route_choice_set.pyx b/aequilibrae/paths/route_choice_set.pyx
index 476816ae1..a726724f8 100644
--- a/aequilibrae/paths/route_choice_set.pyx
+++ b/aequilibrae/paths/route_choice_set.pyx
@@ -299,7 +299,7 @@ cdef class RouteChoiceSet:
             unsigned int c_seed = seed
             unsigned int c_cores = cores if cores > 0 else omp_get_max_threads()
 
-            # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values
+            # Scale cutoff prob from [0, 1] -> [0.5, 1]. Values below 0.5 produce negative inverse binary logit values.
             double scaled_cutoff_prob = cutoff_prob * 0.5 + 0.5
 
             vector[pair[long long, long long]] c_ods
@@ -371,7 +371,7 @@ cdef class RouteChoiceSet:
             results.resize(batch_len)
 
             if path_size_logit:
-                # we may clear these objects because it's either:
+                # We may clear these objects because it's either:
                 # - the first iteration and they contain no elements, thus no memory to leak
                 # - the internal objects were freed by the previous iteration
                 link_union_set.clear()
@@ -621,10 +621,10 @@ cdef class RouteChoiceSet:
 
             for banned in queue:
                 if lp:
-                    # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation
+                    # We copy the penalised cost buffer into the thread cost buffer to allow us to apply link penalisation,
                     copy(penalised_cost.cbegin(), penalised_cost.cend(), &thread_cost[0])
                 else:
-                    # Otherwise we just copy directly from the cost view
+                    # ...otherwise we just copy directly from the cost view.
                     memcpy(&thread_cost[0], &self.cost_view[0], self.cost_view.shape[0] * sizeof(double))
 
                 for connector in deref(banned):
@@ -940,6 +940,7 @@ cdef class RouteChoiceSet:
         prob_vec = new vector[double]()
         prob_vec.reserve(total_cost.size())
 
+        # The route mask should be True for the routes we wish to include.
         for i in range(total_cost.size()):
             route_mask[i] = total_cost[i] <= cutoff_cost
 
@@ -949,11 +950,14 @@ cdef class RouteChoiceSet:
             if route_mask[i]:
                 inv_prob = 0.0
                 for j in range(total_cost.size()):
+                    # We must skip any other routes that are not included in the mask otherwise our probabilities
+                    # won't add up.
                     if route_mask[j]:
                         inv_prob = inv_prob + pow(path_overlap_vec[j] / path_overlap_vec[i], beta) \
                             * exp(-theta * (total_cost[j] - total_cost[i]))
                 prob_vec.push_back(1.0 / inv_prob)
             else:
+                # Anything that has been excluded gets a probability of 0 rather than be removed entirely.
                 prob_vec.push_back(0.0)
 
         return prob_vec
@@ -1336,7 +1340,7 @@ cdef class RouteChoiceSet:
         for i in range(ods.size()):
             route_set = route_sets[i]
 
-            # Instead of construction a "list of lists" style object for storing the route sets we instead will
+            # Instead of constructing a "list of lists" style object for storing the route sets we instead will
             # construct one big array of link IDs with a corresponding offsets array that indicates where each new row
             # (path) starts.
             for route in deref(route_set):