Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function to find densest subgraph #635

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,7 @@ Connectivity and Cycles
retworkx.chain_decomposition
retworkx.all_simple_paths
retworkx.all_pairs_all_simple_paths
retworkx.densest_subgraph_of_size

.. _graph-ops:

Expand Down
31 changes: 31 additions & 0 deletions releasenotes/notes/densest_subgraph-1b068f69f80facd4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
features:
- |
Added a new function, :func:`~.densest_subgraph_of_size`, which is used to return a
subgraph of given size that has the highest degree of connecitivity between the nodes.
For example, if you wanted to find the subgraph of 5 nodes in a 19 node heavy hexagon
graph:

.. jupyter-execute::

import retworkx
from retworkx.visualization import mpl_draw

graph = retworkx.generators.hexagonal_lattice_graph(4, 5)

subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 5)
subgraph_edge_set = set(subgraph.edge_list())
node_colors = []
for node in graph.node_indices():
if node in node_map:
node_colors.append('red')
else:
node_colors.append('blue')
graph[node] = node
edge_colors = []
for edge in graph.edge_list():
if edge[0] in node_map and edge[1] in node_map:
edge_colors.append('red')
else:
edge_colors.append('blue')
mpl_draw(graph, with_labels=True, node_color=node_colors, edge_color=edge_colors, labels=str)
34 changes: 34 additions & 0 deletions retworkx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2334,3 +2334,37 @@ def _digraph_all_pairs_bellman_ford_shortest_path(graph, edge_cost_fn):
@all_pairs_bellman_ford_shortest_paths.register(PyGraph)
def _graph_all_pairs_bellman_ford_shortest_path(graph, edge_cost_fn):
return graph_all_pairs_bellman_ford_shortest_paths(graph, edge_cost_fn)


@functools.singledispatch
def densest_subgraph_of_size(graph, num_nodes, weight_callback=None):
"""Find densest subgraph in a :class:`~.PyGraph`
mtreinish marked this conversation as resolved.
Show resolved Hide resolved

This method does not provide any guarantees on the approximation as it
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
does a naive search using BFS traversal.

:param graph: The graph to find the densest subgraph in. This can be a
:class:`~retworkx.PyGraph` or a :class:`~retworkx.PyDiGraph`.
:param int num_nodes: The number of nodes in the subgraph to find
:param func weight_callback: An optional callable that if specified will be
passed the node indices of each edge in the graph and it is expected to
return a float value. If specified the lowest avg weight for edges in
a found subgraph will be a criteria for selection in addition to the
connectivity of the subgraph.
:returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the
mapping of node indices in the input ``graph`` to the index in the
output subgraph.

:rtype: (subgraph, node_map)
"""
raise TypeError("Invalid Input Type %s for graph" % type(graph))


@densest_subgraph_of_size.register(PyDiGraph)
def _digraph_densest_subgraph_of_size(graph, num_nodes, weight_callback=None):
return digraph_densest_subgraph_of_size(graph, num_nodes, weight_callback=weight_callback)


@densest_subgraph_of_size.register(PyGraph)
def _graph_densest_subgraph_of_size(graph, num_nodes, weight_callback=None):
return graph_densest_subgraph_of_size(graph, num_nodes, weight_callback=weight_callback)
215 changes: 215 additions & 0 deletions src/dense_subgraph.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
// Licensed under the Apache License, Version 2.0 (the "License"); you may
// not use this file except in compliance with the License. You may obtain
// a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.

use hashbrown::{HashMap, HashSet};

use petgraph::algo;
use petgraph::graph::NodeIndex;
use petgraph::prelude::*;
use petgraph::visit::{IntoEdgeReferences, NodeFiltered};
use petgraph::EdgeType;

use rayon::prelude::*;

use pyo3::prelude::*;
use pyo3::Python;

use retworkx_core::dictmap::*;

use crate::digraph;
use crate::graph;
use crate::iterators::NodeMap;
use crate::StablePyGraph;

struct SubsetResult {
pub count: usize,
pub error: f64,
pub map: Vec<NodeIndex>,
pub subgraph: Vec<[NodeIndex; 2]>,
}

pub fn densest_subgraph<Ty>(
py: Python,
graph: &StablePyGraph<Ty>,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(StablePyGraph<Ty>, NodeMap)>
where
Ty: EdgeType + Sync,
{
let node_indices: Vec<NodeIndex> = graph.node_indices().collect();
let float_callback =
|callback: PyObject, source_node: usize, target_node: usize| -> PyResult<f64> {
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
let res = callback.as_ref(py).call1((source_node, target_node))?;
res.extract()
};
let mut weight_map: Option<HashMap<[NodeIndex; 2], f64>> = None;

if weight_callback.is_some() {
let mut inner_weight_map: HashMap<[NodeIndex; 2], f64> =
HashMap::with_capacity(graph.edge_count());
let callback = weight_callback.as_ref().unwrap();
for edge in graph.edge_references() {
let source: NodeIndex = edge.source();
let target: NodeIndex = edge.target();
let weight = float_callback(callback.clone_ref(py), source.index(), target.index())?;
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
inner_weight_map.insert([source, target], weight);
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
}
weight_map = Some(inner_weight_map);
}
let reduce_identity_fn = || -> SubsetResult {
SubsetResult {
count: 0,
map: Vec::new(),
error: std::f64::INFINITY,
subgraph: Vec::new(),
}
};

let reduce_fn = |best: SubsetResult, curr: SubsetResult| -> SubsetResult {
if weight_callback.is_some() {
if curr.count >= best.count && curr.error <= best.error {
curr
} else {
best
}
} else if curr.count > best.count {
curr
} else {
best
}
};

let best_result = node_indices
.into_par_iter()
.map(|index| {
let mut subgraph: Vec<[NodeIndex; 2]> = Vec::with_capacity(num_nodes);
let mut bfs = Bfs::new(&graph, index);
let mut bfs_vec: Vec<NodeIndex> = Vec::with_capacity(num_nodes);
let mut bfs_set: HashSet<NodeIndex> = HashSet::with_capacity(num_nodes);

let mut count = 0;
while let Some(node) = bfs.next(&graph) {
bfs_vec.push(node);
bfs_set.insert(node);
count += 1;
if count >= num_nodes {
break;
}
}
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
let mut connection_count = 0;
for node in &bfs_vec {
for j in graph.node_indices().filter(|j| bfs_set.contains(j)) {
if graph.contains_edge(*node, j) {
connection_count += 1;
subgraph.push([*node, j]);
}
}
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
}
let error = match &weight_map {
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
Some(map) => subgraph.iter().map(|edge| map[edge]).sum::<f64>() / num_nodes as f64,
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
None => 0.,
};
SubsetResult {
count: connection_count,
error,
map: bfs_vec,
subgraph,
}
})
.reduce(reduce_identity_fn, reduce_fn);

let mut subgraph = StablePyGraph::<Ty>::with_capacity(num_nodes, best_result.subgraph.len());
let mut node_map: DictMap<usize, usize> = DictMap::with_capacity(num_nodes);
for node in best_result.map {
let new_index = subgraph.add_node(graph[node].clone_ref(py));
node_map.insert(node.index(), new_index.index());
}
let node_filter = |node: NodeIndex| -> bool { node_map.contains_key(&node.index()) };
let filtered = NodeFiltered(graph, node_filter);
for edge in filtered.edge_references() {
let new_source = NodeIndex::new(*node_map.get(&edge.source().index()).unwrap());
let new_target = NodeIndex::new(*node_map.get(&edge.target().index()).unwrap());
subgraph.add_edge(new_source, new_target, edge.weight().clone_ref(py));
}
Ok((subgraph, NodeMap { node_map }))
}

/// Find densest subgraph in a :class:`~.PyGraph`
///
/// This method does not provide any guarantees on the approximation as it
/// does a naive search using BFS traversal.
///
/// :param PyGraph graph: The graph to find densest subgraph in.
/// :param int num_nodes: The number of nodes in the subgraph to find
/// :param func weight_callback: An optional callable that if specified will be
/// passed the node indices of each edge in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for edges in
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
/// :returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the
/// mapping of node indices in the input ``graph`` to the index in the
/// output subgraph.
/// :rtype: (PyGraph, NodeMap)
#[pyfunction]
#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")]
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
pub fn graph_densest_subgraph_of_size(
py: Python,
graph: &graph::PyGraph,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(graph::PyGraph, NodeMap)> {
let (inner_graph, node_map) = densest_subgraph(py, &graph.graph, num_nodes, weight_callback)?;
let out_graph = graph::PyGraph {
graph: inner_graph,
node_removed: false,
multigraph: graph.multigraph,
attrs: py.None(),
};
Ok((out_graph, node_map))
}

/// Find densest subgraph in a :class:`~.PyDiGraph`
///
/// This method does not provide any guarantees on the approximation as it
/// does a naive search using BFS traversal.
///
/// :param PyDiGraph graph: The graph to find the densest subgraph in.
/// :param int num_nodes: The number of nodes in the subgraph to find
/// :param func weight_callback: An optional callable that if specified will be
/// passed the node indices of each edge in the graph and it is expected to
/// return a float value. If specified the lowest avg weight for edges in
/// a found subgraph will be a criteria for selection in addition to the
/// connectivity of the subgraph.
/// :returns: A tuple of the subgraph found and a :class:`~.NodeMap` of the
/// mapping of node indices in the input ``graph`` to the index in the
/// output subgraph.
/// :rtype: (PyDiGraph, NodeMap)
#[pyfunction]
#[pyo3(text_signature = "(graph. num_nodes, /, weight_callback=None)")]
mtreinish marked this conversation as resolved.
Show resolved Hide resolved
pub fn digraph_densest_subgraph_of_size(
py: Python,
graph: &digraph::PyDiGraph,
num_nodes: usize,
weight_callback: Option<PyObject>,
) -> PyResult<(digraph::PyDiGraph, NodeMap)> {
let (inner_graph, node_map) = densest_subgraph(py, &graph.graph, num_nodes, weight_callback)?;
let out_graph = digraph::PyDiGraph {
graph: inner_graph,
node_removed: false,
cycle_state: algo::DfsSpace::default(),
check_cycle: graph.check_cycle,
multigraph: graph.multigraph,
attrs: py.None(),
};
Ok((out_graph, node_map))
}
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ mod centrality;
mod coloring;
mod connectivity;
mod dag_algo;
mod dense_subgraph;
mod digraph;
mod dot_utils;
mod generators;
Expand All @@ -39,6 +40,7 @@ use centrality::*;
use coloring::*;
use connectivity::*;
use dag_algo::*;
use dense_subgraph::*;
use graphml::*;
use isomorphism::*;
use layout::*;
Expand Down Expand Up @@ -461,6 +463,8 @@ fn retworkx(py: Python<'_>, m: &PyModule) -> PyResult<()> {
m.add_wrapped(wrap_pyfunction!(biconnected_components))?;
m.add_wrapped(wrap_pyfunction!(chain_decomposition))?;
m.add_wrapped(wrap_pyfunction!(read_graphml))?;
m.add_wrapped(wrap_pyfunction!(digraph_densest_subgraph_of_size))?;
m.add_wrapped(wrap_pyfunction!(graph_densest_subgraph_of_size))?;
m.add_class::<digraph::PyDiGraph>()?;
m.add_class::<graph::PyGraph>()?;
m.add_class::<toposort::TopologicalSorter>()?;
Expand Down
31 changes: 31 additions & 0 deletions tests/graph/test_densest_subgraph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

import unittest

import retworkx


class TestDensestSubgraph(unittest.TestCase):
def test_simple_grid_three_nodes(self):
graph = retworkx.generators.grid_graph(3, 3)
subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 3)
expected_subgraph_edge_list = [(0, 2), (0, 1)]
self.assertEqual(expected_subgraph_edge_list, subgraph.edge_list())
self.assertEqual(node_map, {0: 0, 1: 1, 3: 2})

def test_simple_grid_six_nodes(self):
graph = retworkx.generators.grid_graph(3, 3)
subgraph, node_map = retworkx.densest_subgraph_of_size(graph, 6)
expected_subgraph_edge_list = [(5, 2), (5, 3), (3, 0), (3, 4), (4, 1), (2, 0), (0, 1)]
self.assertEqual(expected_subgraph_edge_list, subgraph.edge_list())
self.assertEqual(node_map, {7: 0, 8: 1, 6: 2, 4: 3, 5: 4, 3: 5})