Skip to content

Commit

Permalink
implement shortest path distance for all graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
scottgigante committed Mar 14, 2019
1 parent 6f65394 commit 04bec15
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 44 deletions.
68 changes: 68 additions & 0 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from inspect import signature
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.preprocessing import normalize
from sklearn.utils.graph import graph_shortest_path
from scipy import sparse
import warnings
import numbers
Expand Down Expand Up @@ -654,6 +655,73 @@ def to_pickle(self, path):
if int(sys.version.split(".")[1]) < 7 and isinstance(self, pygsp.graphs.Graph):
self.logger = logger

def shortest_path(self, method='auto', distance=None):
"""
Find the length of the shortest path between every pair of vertices on the graph
Parameters
----------
method : string ['auto'|'FW'|'D']
method to use. Options are
'auto' : attempt to choose the best method for the current problem
'FW' : Floyd-Warshall algorithm. O[N^3]
'D' : Dijkstra's algorithm with Fibonacci stacks. O[(k+log(N))N^2]
distance : {'constant', 'data', 'affinity'}, optional (default: 'data')
Distances along kNN edges.
'constant' gives constant edge lengths.
'data' gives distances in ambient data space.
'affinity' gives distances as negative log affinities.
Returns
-------
D : np.ndarray, float, shape = [N,N]
D[i,j] gives the shortest distance from point i to point j
along the graph. If no path exists, the distance is np.inf
Notes
-----
Currently, shortest paths can only be calculated on kNNGraphs with
`decay=None`
"""
if distance is None:
if self.decay is None:
distance = 'data'
tasklogger.log_info("Using ambient data distances.")
else:
distance = 'affinity'
tasklogger.log_info("Using negative log affinity distances.")

if distance != 'affinity' and self.decay is not None:
raise NotImplementedError(
"Graph shortest path with constant or data distance only "
"implemented for kNNGraph with `decay=None`. "
"For decaying kernel, use `distance='affinity'`.")
elif distance == 'affinity' and self.decay is None:
raise NotImplementedError(
"Graph shortest path with affinity distance only "
"implemented for kNNGraph with `decay!=None`. "
"For kNN kernel, use `distance='constant'` "
"or `distance='data'`.")

if distance == 'constant':
D = self.K
elif distance == 'data':
D = sparse.coo_matrix(self.K)
D.data = np.sqrt(np.sum((
self.data_nu[D.row] - self.data_nu[D.col])**2, axis=1))
elif distance == 'affinity':
D = sparse.csr_matrix(self.K)
D.data = -1 * np.log(D.data)
else:
raise ValueError(
"Expected `distance` in ['constant', 'data', 'affinity']. "
"Got {}".format(distance))

P = graph_shortest_path(D, method=method)
# sklearn returns 0 if no path exists
P[np.where(P == 0)] = np.inf
# diagonal should actually be zero
P[(np.arange(P.shape[0]), np.arange(P.shape[0]))] = 0
return P


class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)):
"""Interface between BaseGraph and PyGSP.
Expand Down
35 changes: 0 additions & 35 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from sklearn.utils.extmath import randomized_svd
from sklearn.preprocessing import normalize
from sklearn.cluster import MiniBatchKMeans
from sklearn.utils.graph import graph_shortest_path
from scipy.spatial.distance import pdist, cdist
from scipy.spatial.distance import squareform
from scipy import sparse
Expand Down Expand Up @@ -375,40 +374,6 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None,
tasklogger.log_complete("affinities")
return K

def shortest_path(self, method='auto'):
"""
Find the length of the shortest path between every pair of vertices on the graph
Parameters
----------
method : string ['auto'|'FW'|'D']
method to use. Options are
'auto' : attempt to choose the best method for the current problem
'FW' : Floyd-Warshall algorithm. O[N^3]
'D' : Dijkstra's algorithm with Fibonacci stacks. O[(k+log(N))N^2]
Returns
-------
D : np.ndarray, float, shape = [N,N]
D[i,j] gives the shortest distance from point i to point j
along the graph. If no path exists, the distance is np.inf
Notes
-----
Currently, shortest paths can only be calculated on kNNGraphs with
`decay=None`
"""
if self.decay is None:
D = self.K
else:
raise NotImplementedError(
"Graph shortest path currently only "
"implemented for kNNGraph with `decay=None`.")
P = graph_shortest_path(D, method=method)
# sklearn returns 0 if no path exists
P[np.where(P == 0)] = np.inf
# diagonal should actually be zero
P[(np.arange(P.shape[0]), np.arange(P.shape[0]))] = 0
return P


class LandmarkGraph(DataGraph):
"""Landmark graph
Expand Down
2 changes: 1 addition & 1 deletion graphtools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "1.1.1-alpha"
__version__ = "1.2.0-alpha"
36 changes: 36 additions & 0 deletions test/test_exact.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import print_function
from sklearn.utils.graph import graph_shortest_path
from load_tests import (
graphtools,
np,
Expand Down Expand Up @@ -452,6 +453,41 @@ def test_exact_graph_anisotropy():
decay=a, knn=k - 1, random_state=42,
use_pygsp=True, anisotropy='invalid')

#####################################################
# Check extra functionality
#####################################################


def test_shortest_path_affinity():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=15)
D = -1 * np.where(G.K != 0, np.log(np.where(G.K != 0, G.K, np.nan)), 0)
P = graph_shortest_path(D)
# sklearn returns 0 if no path exists
P[np.where(P == 0)] = np.inf
# diagonal should actually be zero
np.fill_diagonal(P, 0)
np.testing.assert_equal(P, G.shortest_path(distance='affinity'))
np.testing.assert_equal(P, G.shortest_path())


@raises(NotImplementedError)
def test_shortest_path_decay_constant():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=15)
G.shortest_path(distance='constant')


@raises(NotImplementedError)
def test_shortest_path_decay_data():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=15)
G.shortest_path(distance='data')


#####################################################
# Check interpolation
#####################################################
Expand Down
28 changes: 20 additions & 8 deletions test/test_knn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import print_function, division
from sklearn.utils.graph import graph_shortest_path
from scipy.spatial.distance import pdist, squareform
from load_tests import (
graphtools,
np,
Expand All @@ -12,8 +13,6 @@
assert_raises,
warns,
raises,
squareform,
pdist,
PCA,
TruncatedSVD,
)
Expand Down Expand Up @@ -316,25 +315,38 @@ def test_knn_interpolate():
#################################################


def test_shortest_path():
def test_shortest_path_constant():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=None)
K = G.K
P = graph_shortest_path(G.K)
# sklearn returns 0 if no path exists
P[np.where(P == 0)] = np.inf
# diagonal should actually be zero
np.fill_diagonal(P, 0)
np.testing.assert_equal(P, G.shortest_path())
np.testing.assert_equal(P, G.shortest_path(distance='constant'))


def test_shortest_path_data():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=None)
D = squareform(pdist(G.data_nu)) * np.where(G.K.toarray() > 0, 1, 0)
P = graph_shortest_path(D)
# sklearn returns 0 if no path exists
P[np.where(P == 0)] = np.inf
# diagonal should actually be zero
np.fill_diagonal(P, 0)
np.testing.assert_allclose(P, G.shortest_path(distance='data'))
np.testing.assert_allclose(P, G.shortest_path())


@raises(NotImplementedError)
def test_shortest_path_decay():
def test_shortest_path_no_decay_affinity():
data_small = data[np.random.choice(
len(data), len(data) // 4, replace=False)]
G = build_graph(data_small, knn=5, decay=15, thresh=1e-4)
G.shortest_path()
G = build_graph(data_small, knn=5, decay=None)
G.shortest_path(distance='affinity')


####################
Expand Down

0 comments on commit 04bec15

Please sign in to comment.