-
Notifications
You must be signed in to change notification settings - Fork 91
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Speed and scalability improvements for graph multiresolution #3
base: master
Are you sure you want to change the base?
Changes from 5 commits
4502ee1
df029a2
5709213
3234d66
df13afe
548c357
9ecacf0
c1d75ba
bf2183e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
# -*- coding: utf-8 -*- | ||
r"""This module contains functionalities for the reduction of graphs' vertex set while keeping the graph structure.""" | ||
|
||
from ..utils import resistance_distance, build_logger | ||
from ..utils import resistance_distance, build_logger, extract_submatrix, splu_inv_dot, approx_resistance_distance | ||
from ..graphs import Graph | ||
from ..filters import Filter | ||
|
||
|
@@ -12,17 +12,26 @@ | |
logger = build_logger(__name__) | ||
|
||
|
||
def graph_sparsify(M, epsilon, maxiter=10): | ||
def graph_sparsify(M, epsilon, maxiter=10, fast=True): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would set fast=False by default, so that the previous default behavior of this function remains unchanged. What do you think? |
||
r""" | ||
Sparsify a graph using Spielman-Srivastava algorithm. | ||
|
||
Parameters | ||
---------- | ||
M : Graph or sparse matrix | ||
Graph structure or a Laplacian matrix | ||
epsilon : int | ||
|
||
epsilon : float | ||
Sparsification parameter | ||
|
||
maxiter : int (optional) | ||
Number of iterations in successive attempts at reducing the sparsification | ||
parameter to preserve connectivity. (default: 10) | ||
|
||
fast : bool | ||
Whether to use the fast resistance distance from :cite:`spielman2011graph` | ||
or exact value. (default: True) | ||
|
||
Returns | ||
------- | ||
Mnew : Graph or sparse matrix | ||
|
@@ -32,6 +41,11 @@ def graph_sparsify(M, epsilon, maxiter=10): | |
----- | ||
Epsilon should be between 1/sqrt(N) and 1 | ||
|
||
The resistance distances computed by the `fast` option are approximate but | ||
that approximation is included in the graph sparsification bounds of the | ||
Spielman-Srivastava algorithm. Without this option, distances are computed | ||
by blunt matrix inversion which does not scale for large graphs. | ||
|
||
Examples | ||
-------- | ||
>>> from pygsp import graphs, operators | ||
|
@@ -49,35 +63,26 @@ def graph_sparsify(M, epsilon, maxiter=10): | |
if isinstance(M, Graph): | ||
if not M.lap_type == 'combinatorial': | ||
raise NotImplementedError | ||
L = M.L | ||
g = M | ||
g.create_incidence_matrix() | ||
else: | ||
L = M | ||
g = Graph(W=sparse.diags(M.diagonal()) - M, lap_type='combinatorial') | ||
g.create_incidence_matrix() | ||
|
||
N = np.shape(L)[0] | ||
N = g.N | ||
|
||
if not 1./np.sqrt(N) <= epsilon < 1: | ||
raise ValueError('GRAPH_SPARSIFY: Epsilon out of required range') | ||
|
||
# Not sparse | ||
resistance_distances = resistance_distance(L).toarray() | ||
# Get the Weight matrix | ||
if isinstance(M, Graph): | ||
W = M.W | ||
if fast: | ||
Re = approx_resistance_distance(g, epsilon) | ||
else: | ||
W = np.diag(L.diagonal()) - L.toarray() | ||
W[W < 1e-10] = 0 | ||
|
||
W = sparse.coo_matrix(W) | ||
W.data[W.data < 1e-10] = 0 | ||
W = W.tocsc() | ||
W.eliminate_zeros() | ||
|
||
|
||
start_nodes, end_nodes, weights = sparse.find(sparse.tril(W)) | ||
Re = resistance_distance(g.L).toarray() | ||
Re = Re[g.start_nodes, g.end_nodes] | ||
|
||
# Calculate the new weights. | ||
weights = np.maximum(0, weights) | ||
Re = np.maximum(0, resistance_distances[start_nodes, end_nodes]) | ||
weights = np.maximum(0, g.Wb.diagonal()) | ||
Re = np.maximum(0, Re) | ||
Pe = weights * Re | ||
Pe = Pe / np.sum(Pe) | ||
|
||
|
@@ -97,7 +102,7 @@ def graph_sparsify(M, epsilon, maxiter=10): | |
counts[spin_counts[:, 0]] = spin_counts[:, 1] | ||
new_weights = counts * per_spin_weights | ||
|
||
sparserW = sparse.csc_matrix((new_weights, (start_nodes, end_nodes)), | ||
sparserW = sparse.csc_matrix((new_weights, (g.start_nodes, g.end_nodes)), | ||
shape=(N, N)) | ||
sparserW = sparserW + sparserW.T | ||
sparserL = sparse.diags(sparserW.diagonal(), 0) - sparserW | ||
|
@@ -279,7 +284,7 @@ def graph_multiresolution(G, levels, **kwargs): | |
return Gs | ||
|
||
|
||
def kron_reduction(G, ind): | ||
def kron_reduction(G, ind, threshold=np.spacing(1)): | ||
r""" | ||
Compute the kron reduction. | ||
|
||
|
@@ -295,12 +300,22 @@ def kron_reduction(G, ind): | |
Graph structure or weight matrix | ||
ind : list | ||
indices of the nodes to keep | ||
threshold: float | ||
Threshold applied to the reduced Laplacian matrix to remove numerical | ||
noise. (default: marchine precision) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. typo: machine |
||
|
||
Returns | ||
------- | ||
Gnew : Graph or sparse matrix | ||
New graph structure or weight matrix | ||
|
||
Notes | ||
----- | ||
For large graphs, with default thresholding value, the kron reduction can | ||
lead to an extremely large number of edges, most of which have very small | ||
weight. In this situation, a larger thresholding can remove most of these | ||
unnecessary edges, an approximation that also makes subsequent spasrsification | ||
much faster. | ||
|
||
References | ||
---------- | ||
|
@@ -327,27 +342,24 @@ def kron_reduction(G, ind): | |
N = np.shape(L)[0] | ||
ind_comp = np.setdiff1d(np.arange(N, dtype=int), ind) | ||
|
||
L_red = L[np.ix_(ind, ind)] | ||
L_in_out = L[np.ix_(ind, ind_comp)] | ||
L_out_in = L[np.ix_(ind_comp, ind)].tocsc() | ||
L_comp = L[np.ix_(ind_comp, ind_comp)].tocsc() | ||
L_red = extract_submatrix(L,ind, ind) | ||
L_in_out = extract_submatrix(L, ind, ind_comp) | ||
L_out_in = L_in_out.transpose().tocsc() | ||
L_comp = extract_submatrix(L,ind_comp, ind_comp).tocsc() | ||
|
||
Lnew = L_red - L_in_out.dot(spsolve(L_comp, L_out_in)) | ||
Lnew = L_red - L_in_out.dot(splu_inv_dot(L_comp, L_out_in)) | ||
|
||
# Make the laplacian symmetric if it is almost symmetric! | ||
if np.abs(Lnew - Lnew.T).sum() < np.spacing(1) * np.abs(Lnew).sum(): | ||
Lnew = (Lnew + Lnew.T) / 2. | ||
# Threshold excedingly small values for stability | ||
Lnew = Lnew.tocoo() | ||
Lnew.data[abs(Lnew.data) < threshold] = 0 | ||
Lnew = Lnew.tocsc() | ||
Lnew.eliminate_zeros() | ||
|
||
# Enforces symmetric Laplacian | ||
Lnew = (Lnew + Lnew.T) / 2. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we always want the Laplacian to be symmetric here? In the previous implementation, this line was called only under the conditional statement. |
||
|
||
if isinstance(G, Graph): | ||
# Suppress the diagonal ? This is a good question? | ||
Wnew = sparse.diags(Lnew.diagonal(), 0) - Lnew | ||
Snew = Lnew.diagonal() - np.ravel(Wnew.sum(0)) | ||
if np.linalg.norm(Snew, 2) >= np.spacing(1000): | ||
Wnew = Wnew + sparse.diags(Snew, 0) | ||
|
||
# Removing diagonal for stability | ||
Wnew = Wnew - Wnew.diagonal() | ||
|
||
coords = G.coords[ind, :] if len(G.coords.shape) else np.ndarray(None) | ||
Gnew = Graph(W=Wnew, coords=coords, lap_type=G.lap_type, | ||
plotting=G.plotting, gtype='Kron reduction') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know if this is faster/slower/same as what's done in adj2vec(), lines 23-24 in pygsp/data_handling.py? Indeed, I rather prefer your design, than the use of adj2vec we're making now for computing graph gradients. I might make the necessary adaptions to replace it by this call of your create_incidence_matrix soon.