Skip to content

Commit

Permalink
Merge pull request #13 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
Upgrade to 0.1.5
  • Loading branch information
scottgigante committed Jun 19, 2018
2 parents ae09095 + a571ac2 commit 31ba673
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 26 deletions.
4 changes: 3 additions & 1 deletion graphtools/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,5 +226,7 @@ class Graph(parent_classes[0], parent_classes[1], parent_classes[2]):
# build graph and return
log_debug("Initializing {} with arguments {}".format(
parent_classes,
params))
", ".join(["{}='{}'".format(key, value)
for key, value in params.items()
if key != "data"])))
return Graph(**params)
11 changes: 7 additions & 4 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,10 @@ def _reduce_data(self):
log_complete("PCA")
return data_nu
else:
return self.data
data = self.data
if sparse.issparse(data):
data = data.toarray()
return data

def get_params(self):
"""Get parameters from this object
Expand Down Expand Up @@ -305,7 +308,7 @@ def __init__(self, kernel_symm='+',
super().__init__(**kwargs)

def _check_symmetrization(self, kernel_symm, gamma):
if kernel_symm not in ['+', '*', 'gamma', 'none']:
if kernel_symm not in ['+', '*', 'gamma', None]:
raise ValueError(
"kernel_symm '{}' not recognized. Choose from "
"'+', '*', 'gamma', or 'none'.".format(kernel_symm))
Expand Down Expand Up @@ -358,13 +361,13 @@ def symmetrize_kernel(self, K):
"Using gamma symmetrization (gamma = {}).".format(self.gamma))
K = self.gamma * elementwise_minimum(K, K.T) + \
(1 - self.gamma) * elementwise_maximum(K, K.T)
elif self.kernel_symm == 'none':
elif self.kernel_symm is None:
log_debug("Using no symmetrization.")
pass
else:
# this should never happen
raise ValueError(
"Expected kernel_symm in ['+', '*', 'gamma' or 'none']. "
"Expected kernel_symm in ['+', '*', 'gamma' or None]. "
"Got {}".format(self.gamma))
return K

Expand Down
63 changes: 43 additions & 20 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ def __init__(self, data, knn=5, decay=None,
if decay is not None and thresh <= 0:
raise ValueError("Cannot instantiate a kNNGraph with `decay=None` "
"and `thresh=0`. Use a TraditionalGraph instead.")
if knn > data.shape[0]:
warnings.warn("Cannot set knn ({k}) to be greater than "
"data.shape[0] ({n}). Setting knn={n}".format(
k=knn, n=data.shape[0]))

super().__init__(data, **kwargs)

Expand Down Expand Up @@ -203,6 +207,11 @@ def build_kernel_to_data(self, Y, knn=None):
"""
if knn is None:
knn = self.knn
if knn > self.data.shape[0]:
warnings.warn("Cannot set knn ({k}) to be greater than "
"data.shape[0] ({n}). Setting knn={n}".format(
k=knn, n=self.data.shape[0]))

Y = self._check_extension_shape(Y)
log_start("KNN search")
if self.decay is None or self.thresh == 1:
Expand All @@ -214,7 +223,7 @@ def build_kernel_to_data(self, Y, knn=None):
else:
# sparse fast alpha decay
knn_tree = self.knn_tree
search_knn = min(knn * 20, len(self.data_nu))
search_knn = min(knn * 20, self.data_nu.shape[0])
distances, indices = knn_tree.kneighbors(
Y, n_neighbors=search_knn)
log_complete("KNN search")
Expand All @@ -230,9 +239,9 @@ def build_kernel_to_data(self, Y, knn=None):
distances = [d for d in distances]
indices = [i for i in indices]
while len(update_idx) > len(Y) // 10 and \
search_knn < len(self.data_nu) / 2:
search_knn < self.data_nu.shape[0] / 2:
# increase the knn search
search_knn = min(search_knn * 20, len(self.data_nu))
search_knn = min(search_knn * 20, self.data_nu.shape[0])
dist_new, ind_new = knn_tree.kneighbors(
Y[update_idx], n_neighbors=search_knn)
for i, idx in enumerate(update_idx):
Expand All @@ -243,12 +252,11 @@ def build_kernel_to_data(self, Y, knn=None):
log_debug("search_knn = {}; {} remaining".format(
search_knn,
len(update_idx)))
if search_knn > len(self.data_nu) / 2:
if search_knn > self.data_nu.shape[0] / 2:
knn_tree = NearestNeighbors(knn, algorithm='brute',
n_jobs=-1).fit(self.data_nu)
n_jobs=self.n_jobs).fit(self.data_nu)
if len(update_idx) > 0:
log_debug("radius search on {}".format(search_knn,
len(update_idx)))
log_debug("radius search on {}".format(len(update_idx)))
# give up - radius search
dist_new, ind_new = knn_tree.radius_neighbors(
Y[update_idx, :],
Expand Down Expand Up @@ -680,6 +688,10 @@ def build_kernel(self):
elif self.precomputed is "adjacency":
# need to set diagonal to one to make it an affinity matrix
K = self.data_nu
if sparse.issparse(K) and \
not (isinstance(K, sparse.dok_matrix) or
isinstance(K, sparse.lil_matrix)):
K = K.tolil()
K = set_diagonal(K, 1)
else:
log_start("affinities")
Expand All @@ -696,6 +708,10 @@ def build_kernel(self):
log_complete("affinities")
# truncate
if sparse.issparse(K):
if not (isinstance(K, sparse.csr_matrix) or
isinstance(K, sparse.csc_matrix) or
isinstance(K, sparse.bsr_matrix)):
K = K.tocsr()
K.data[K.data < self.thresh] = 0
K = K.tocoo()
K.eliminate_zeros()
Expand Down Expand Up @@ -949,7 +965,8 @@ def build_kernel(self):
from .api import Graph
# iterate through sample ids
for i, idx in enumerate(self.samples):
log_debug("subgraph {}: sample {}".format(i, idx))
log_debug("subgraph {}: sample {}, n = {}, knn = {}".format(
i, idx, np.sum(self.sample_idx == idx), self.weighted_knn[i]))
# select data for sample
data = self.data_nu[self.sample_idx == idx]
# build a kNN graph for cells within sample
Expand Down Expand Up @@ -980,35 +997,40 @@ def build_kernel(self):
if i == j:
# downweight within-batch affinities by beta
Kij = Kij * self.beta
K = set_submatrix(K, self.sample_idx == i,
self.sample_idx == j, Kij)
K = set_submatrix(K, self.sample_idx == self.samples[i],
self.sample_idx == self.samples[j], Kij)
log_complete(
"kernel from sample {} to {}".format(self.samples[i],
self.samples[j]))
return K

def symmetrize_kernel(self, K):
if self.kernel_symm == 'gamma' and not isinstance(self.gamma,
numbers.Number):
if self.kernel_symm == 'gamma' and self.gamma is not None and \
not isinstance(self.gamma, numbers.Number):
# matrix gamma
# Gamma can be a matrix with specific values transitions for
# each batch. This allows for technical replicates and
# experimental samples to be corrected simultaneously
log_debug("Using gamma symmetrization. "
"Gamma:\n{}".format(self.gamma))
for i in range(len(self.samples)):
for j in range(i, len(self.samples)):
Kij = K[self.sample_idx == i, :][:, self.sample_idx == j]
Kji = K[self.sample_idx == j, :][:, self.sample_idx == i]
for i, sample_i in enumerate(self.samples):
for j, sample_j in enumerate(self.samples):
if j < i:
continue
Kij = K[np.ix_(self.sample_idx == sample_i,
self.sample_idx == sample_j)]
Kji = K[np.ix_(self.sample_idx == sample_j,
self.sample_idx == sample_i)]
Kij_symm = self.gamma[i, j] * \
elementwise_minimum(Kij, Kji.T) + \
(1 - self.gamma[i, j]) * \
elementwise_maximum(Kij, Kji.T)
K = set_submatrix(K, self.sample_idx == i,
self.sample_idx == j, Kij_symm)
K = set_submatrix(K, self.sample_idx == sample_i,
self.sample_idx == sample_j, Kij_symm)
if not i == j:
K = set_submatrix(K, self.sample_idx == j,
self.sample_idx == i, Kij_symm.T)
K = set_submatrix(K, self.sample_idx == sample_j,
self.sample_idx == sample_i,
Kij_symm.T)
else:
K = super().symmetrize_kernel(K)
return K
Expand Down Expand Up @@ -1043,6 +1065,7 @@ def build_kernel_to_data(self, Y, gamma=None):
transitions : array-like, [n_samples_y, self.data.shape[0]]
Transition matrix from `Y` to `self.data`
"""
raise NotImplementedError
log_warning("building MNN kernel to gamma is experimental")
if not isinstance(self.gamma, str) and \
not isinstance(self.gamma, numbers.Number):
Expand Down
2 changes: 1 addition & 1 deletion graphtools/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.1.4"
__version__ = "0.1.5"
32 changes: 32 additions & 0 deletions test/test_mnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,38 @@ def test_mnn_with_vector_gamma():
gamma=np.linspace(0, 1, n_sample - 1))


def test_mnn_with_non_zero_indexed_sample_idx():
X, sample_idx = generate_swiss_roll()
G = build_graph(X, sample_idx=sample_idx,
kernel_symm='gamma', gamma=0.5,
n_pca=None, use_pygsp=True)
sample_idx += 1
G2 = build_graph(X, sample_idx=sample_idx,
kernel_symm='gamma', gamma=0.5,
n_pca=None, use_pygsp=True)
assert G.N == G2.N
assert np.all(G.d == G2.d)
assert (G.W != G2.W).nnz == 0
assert (G2.W != G.W).sum() == 0
assert isinstance(G2, graphtools.graphs.MNNGraph)


def test_mnn_with_string_sample_idx():
X, sample_idx = generate_swiss_roll()
G = build_graph(X, sample_idx=sample_idx,
kernel_symm='gamma', gamma=0.5,
n_pca=None, use_pygsp=True)
sample_idx = np.where(sample_idx == 0, 'a', 'b')
G2 = build_graph(X, sample_idx=sample_idx,
kernel_symm='gamma', gamma=0.5,
n_pca=None, use_pygsp=True)
assert G.N == G2.N
assert np.all(G.d == G2.d)
assert (G.W != G2.W).nnz == 0
assert (G2.W != G.W).sum() == 0
assert isinstance(G2, graphtools.graphs.MNNGraph)


#####################################################
# Check kernel
#####################################################
Expand Down

0 comments on commit 31ba673

Please sign in to comment.