Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add distance matrix functions #166

Merged
merged 13 commits into from
Nov 6, 2020
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ crate-type = ["cdylib"]
petgraph = "0.5.1"
fixedbitset = "0.2.0"
numpy = "0.12.1"
ndarray = "0.13.0"
rand = "0.7"
rand_pcg = "0.2"
rayon = "1.5"
Expand All @@ -32,3 +31,7 @@ features = ["extension-module", "hashbrown"]
[dependencies.hashbrown]
version = "0.9"
features = ["rayon"]

[dependencies.ndarray]
version = "0.13.0"
features = ["rayon"]
2 changes: 2 additions & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ Algorithm Functions
retworkx.descendants
retworkx.ancestors
retworkx.lexicographical_topological_sort
retworkx.graph_distance_matrix
retworkx.digraph_distance_matrix
retworkx.floyd_warshall
retworkx.graph_floyd_warshall_numpy
retworkx.digraph_floyd_warshall_numpy
Expand Down
159 changes: 159 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,163 @@ fn layers(
Ok(PyList::new(py, output).into())
}

/// Get the distance matrix for a directed graph
///
/// This differs from functions like digraph_floyd_warshall_numpy in that the
/// edge weight/data payload is not used and each edge is treated as a
/// distance of 1.
///
/// This function is also multithreaded and will run in parallel if the number
/// of nodes in the graph is above the value of ``parallel_threshold`` (it
/// defaults to 300). If the function will be running in parallel the env var
/// ``RAYON_NUM_THREADS`` can be used to adjust how many threads will be used.
///
/// :param PyDiGraph graph: The graph to get the distance matrix for
/// :param int parallel_threshold: The number of nodes to calculate the
/// the distance matrix in parallel at. It defaults to 300, but this can
/// be tuned
/// :param bool as_undirected: If set to ``True`` the input directed graph
/// will be treat as if each edge was bidirectional/undirected in the
/// output distance matrix.
///
/// :returns: The distance matrix
/// :rtype: numpy.ndarray
#[pyfunction(parallel_threshold = "300", as_undirected = "false")]
#[text_signature = "(graph, /, parallel_threshold=300, as_undirected=False)"]
pub fn digraph_distance_matrix(
py: Python,
graph: &digraph::PyDiGraph,
parallel_threshold: usize,
as_undirected: bool,
) -> PyResult<PyObject> {
let n = graph.node_count();
let mut matrix = Array2::<f64>::zeros((n, n));
let bfs_traversal = |index: usize, mut row: ArrayViewMut1<f64>| {
let mut seen: HashMap<NodeIndex, usize> = HashMap::new();
let start_index = NodeIndex::new(index);
let mut level = 0;
let mut next_level: HashSet<NodeIndex> = HashSet::new();
next_level.insert(start_index);
while !next_level.is_empty() {
let this_level = next_level;
next_level = HashSet::new();
let mut found: Vec<NodeIndex> = Vec::new();
for v in this_level {
if !seen.contains_key(&v) {
seen.insert(v, level);
found.push(v);
row[[v.index()]] = level as f64;
}
}
if seen.len() == n {
return;
}
for node in found {
for v in graph
.graph
.neighbors_directed(node, petgraph::Direction::Outgoing)
{
next_level.insert(v);
}
if as_undirected {
for v in graph
.graph
.neighbors_directed(node, petgraph::Direction::Incoming)
{
next_level.insert(v);
}
}
}
level += 1
}
};
if n < parallel_threshold {
matrix
.axis_iter_mut(Axis(0))
.enumerate()
.for_each(|(index, row)| bfs_traversal(index, row));
} else {
// Parallelize by row and iterate from each row index in BFS order
matrix
.axis_iter_mut(Axis(0))
.into_par_iter()
.enumerate()
.for_each(|(index, row)| bfs_traversal(index, row));
}
Ok(matrix.into_pyarray(py).into())
}

/// Get the distance matrix for an undirected graph
///
/// This differs from functions like digraph_floyd_warshall_numpy in that the
/// edge weight/data payload is not used and each edge is treated as a
/// distance of 1.
///
/// This function is also multithreaded and will run in parallel if the number
/// of nodes in the graph is above the value of ``paralllel_threshold`` (it
/// defaults to 300). If the function will be running in parallel the env var
/// ``RAYON_NUM_THREADS`` can be used to adjust how many threads will be used.
///
/// :param PyGraph graph: The graph to get the distance matrix for
/// :param int parallel_threshold: The number of nodes to calculate the
/// the distance matrix in parallel at. It defaults to 300, but this can
/// be tuned
///
/// :returns: The distance matrix
/// :rtype: numpy.ndarray
#[pyfunction(parallel_threshold = "300")]
#[text_signature = "(graph, /, parallel_threshold=300)"]
pub fn graph_distance_matrix(
py: Python,
graph: &graph::PyGraph,
parallel_threshold: usize,
) -> PyResult<PyObject> {
let n = graph.node_count();
let mut matrix = Array2::<f64>::zeros((n, n));
let bfs_traversal = |index: usize, mut row: ArrayViewMut1<f64>| {
let mut seen: HashMap<NodeIndex, usize> = HashMap::new();
let start_index = NodeIndex::new(index);
let mut level = 0;
let mut next_level: HashSet<NodeIndex> = HashSet::new();
next_level.insert(start_index);
while !next_level.is_empty() {
let this_level = next_level;
next_level = HashSet::new();
let mut found: Vec<NodeIndex> = Vec::new();
for v in this_level {
if !seen.contains_key(&v) {
seen.insert(v, level);
found.push(v);
row[[v.index()]] = level as f64;
}
}
if seen.len() == n {
return;
}
for node in found {
for v in graph.graph.neighbors(node) {
next_level.insert(v);
}
}
level += 1
}
};
if n < parallel_threshold {
matrix
.axis_iter_mut(Axis(0))
.enumerate()
.for_each(|(index, row)| bfs_traversal(index, row));
} else {
// Parallelize by row and iterate from each row index in BFS order
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
matrix
.axis_iter_mut(Axis(0))
.into_par_iter()
.enumerate()
.for_each(|(index, row)| bfs_traversal(index, row));
}
Ok(matrix.into_pyarray(py).into())
}

fn weight_callable(
py: Python,
weight_fn: &Option<PyObject>,
Expand Down Expand Up @@ -2071,6 +2228,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(graph_floyd_warshall_numpy))?;
m.add_wrapped(wrap_pyfunction!(digraph_floyd_warshall_numpy))?;
m.add_wrapped(wrap_pyfunction!(layers))?;
m.add_wrapped(wrap_pyfunction!(graph_distance_matrix))?;
m.add_wrapped(wrap_pyfunction!(digraph_distance_matrix))?;
m.add_wrapped(wrap_pyfunction!(digraph_adjacency_matrix))?;
m.add_wrapped(wrap_pyfunction!(graph_adjacency_matrix))?;
m.add_wrapped(wrap_pyfunction!(graph_all_simple_paths))?;
Expand Down
111 changes: 111 additions & 0 deletions tests/test_dist_matrix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import unittest

import numpy

import retworkx


class TestDistanceMatrix(unittest.TestCase):

def test_graph_distance_matrix(self):
graph = retworkx.PyGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.graph_distance_matrix(graph)
expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.],
[1., 0., 1., 2., 3., 3., 2.],
[2., 1., 0., 1., 2., 3., 3.],
[3., 2., 1., 0., 1., 2., 3.],
[3., 3., 2., 1., 0., 1., 2.],
[2., 3., 3., 2., 1., 0., 1.],
[1., 2., 3., 3., 2., 1., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))

def test_graph_distance_matrix_parallel(self):
graph = retworkx.PyGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.graph_distance_matrix(graph, parallel_threshold=5)
expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.],
[1., 0., 1., 2., 3., 3., 2.],
[2., 1., 0., 1., 2., 3., 3.],
[3., 2., 1., 0., 1., 2., 3.],
[3., 3., 2., 1., 0., 1., 2.],
[2., 3., 3., 2., 1., 0., 1.],
[1., 2., 3., 3., 2., 1., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))

def test_digraph_distance_matrix(self):
graph = retworkx.PyDiGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.digraph_distance_matrix(graph)
expected = numpy.array([[0., 1., 2., 3., 4., 5., 1.],
[0., 0., 1., 2., 3., 4., 5.],
[0., 0., 0., 1., 2., 3., 4.],
[0., 0., 0., 0., 1., 2., 3.],
[0., 0., 0., 0., 0., 1., 2.],
[0., 0., 0., 0., 0., 0., 1.],
[0., 0., 0., 0., 0., 0., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))

def test_digraph_distance_matrix_parallel(self):
graph = retworkx.PyDiGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.digraph_distance_matrix(graph, parallel_threshold=5)
expected = numpy.array([[0., 1., 2., 3., 4., 5., 1.],
[0., 0., 1., 2., 3., 4., 5.],
[0., 0., 0., 1., 2., 3., 4.],
[0., 0., 0., 0., 1., 2., 3.],
[0., 0., 0., 0., 0., 1., 2.],
[0., 0., 0., 0., 0., 0., 1.],
[0., 0., 0., 0., 0., 0., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))

def test_digraph_distance_matrix_as_undirected(self):
graph = retworkx.PyDiGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.digraph_distance_matrix(graph, as_undirected=True)
expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.],
[1., 0., 1., 2., 3., 3., 2.],
[2., 1., 0., 1., 2., 3., 3.],
[3., 2., 1., 0., 1., 2., 3.],
[3., 3., 2., 1., 0., 1., 2.],
[2., 3., 3., 2., 1., 0., 1.],
[1., 2., 3., 3., 2., 1., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))

def test_digraph_distance_matrix_parallel_as_undirected(self):
graph = retworkx.PyDiGraph()
graph.add_nodes_from(list(range(7)))
graph.add_edges_from_no_data(
[(0, 1), (0, 6), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6)])
dist = retworkx.digraph_distance_matrix(graph, parallel_threshold=5,
as_undirected=True)
expected = numpy.array([[0., 1., 2., 3., 3., 2., 1.],
[1., 0., 1., 2., 3., 3., 2.],
[2., 1., 0., 1., 2., 3., 3.],
[3., 2., 1., 0., 1., 2., 3.],
[3., 3., 2., 1., 0., 1., 2.],
[2., 3., 3., 2., 1., 0., 1.],
[1., 2., 3., 3., 2., 1., 0.]])
self.assertTrue(numpy.array_equal(dist, expected))