Skip to content

Commit

Permalink
normalize between-batch affinities by rowwise magnitude of within-bat…
Browse files Browse the repository at this point in the history
…ch affinities
  • Loading branch information
scottgigante committed Nov 17, 2018
1 parent d63007b commit 718d191
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 17 deletions.
27 changes: 18 additions & 9 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,9 +911,9 @@ class MNNGraph(DataGraph):
Batch index
beta : `float`, optional (default: 1)
Downweight within-batch affinities by beta
Downweight between-batch affinities by beta
adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: 'sqrt')
adaptive_k : {'min', 'mean', 'sqrt', `None`} (default: None)
Weights MNN kernel adaptively using the number of cells in
each sample according to the selected method.
Expand All @@ -925,7 +925,7 @@ class MNNGraph(DataGraph):

def __init__(self, data, sample_idx,
knn=5, beta=1, n_pca=None,
adaptive_k='sqrt',
adaptive_k=None,
decay=None,
bandwidth=None,
distance='euclidean',
Expand Down Expand Up @@ -1116,7 +1116,7 @@ def build_kernel(self):
verbose=self.verbose,
random_state=self.random_state,
n_jobs=self.n_jobs,
initialize=False)
initialize=True)
self.subgraphs.append(graph) # append to list of subgraphs
tasklogger.log_complete("subgraphs")

Expand All @@ -1126,16 +1126,25 @@ def build_kernel(self):
else:
K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
for i, X in enumerate(self.subgraphs):
K = set_submatrix(K, self.sample_idx == self.samples[i],
self.sample_idx == self.samples[i], X.K)
within_batch_norm = np.array(np.sum(X.K, 1)).flatten()
for j, Y in enumerate(self.subgraphs):
if i == j:
continue
tasklogger.log_start(
"kernel from sample {} to {}".format(self.samples[i],
self.samples[j]))
Kij = Y.build_kernel_to_data(
X.data_nu,
knn=self.weighted_knn[i])
if i == j:
# downweight within-batch affinities by beta
Kij = Kij * self.beta
between_batch_norm = np.array(np.sum(Kij, 1)).flatten()
scale = np.minimum(1, within_batch_norm /
between_batch_norm) * self.beta
if sparse.issparse(Kij):
Kij = Kij.multiply(scale[:, None])
else:
Kij = Kij * scale[:, None]
K = set_submatrix(K, self.sample_idx == self.samples[i],
self.sample_idx == self.samples[j], Kij)
tasklogger.log_complete(
Expand All @@ -1147,11 +1156,11 @@ def symmetrize_kernel(self, K):
if self.kernel_symm == 'theta' and self.theta is not None and \
not isinstance(self.theta, numbers.Number):
# matrix theta
# Gamma can be a matrix with specific values transitions for
# Theta can be a matrix with specific values transitions for
# each batch. This allows for technical replicates and
# experimental samples to be corrected simultaneously
tasklogger.log_debug("Using theta symmetrization. "
"Gamma:\n{}".format(self.theta))
"Theta:\n{}".format(self.theta))
for i, sample_i in enumerate(self.samples):
for j, sample_j in enumerate(self.samples):
if j < i:
Expand Down
48 changes: 40 additions & 8 deletions test/test_mnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
raises,
cdist,
)
from scipy.linalg import norm


#####################################################
Expand Down Expand Up @@ -116,7 +117,7 @@ def test_mnn_graph_float_theta():
k = 10
a = 20
metric = 'euclidean'
beta = 0
beta = 0.5
samples = np.unique(sample_idx)

K = np.zeros((len(X), len(X)))
Expand All @@ -133,17 +134,32 @@ def test_mnn_graph_float_theta():
pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize
k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel
if si == sj:
K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
(1 - beta) # fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = (
k_ij + k_ij.T) / 2
else:
# fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = k_ij

Kn = K.copy()
for i in samples:
curr_K = K.iloc[sample_idx == i, sample_idx == i]
i_norm = norm(curr_K, 1, axis=1)
for j in samples:
if i == j:
continue
else:
curr_K = K.iloc[sample_idx == i, sample_idx == j]
curr_norm = norm(curr_K, 1, axis=1)
scale = np.minimum(
np.ones(len(curr_norm)), i_norm / curr_norm) * beta
Kn.iloc[sample_idx == i, sample_idx == j] = (
curr_K.T * scale).T

K = Kn
W = np.array((theta * np.minimum(K, K.T)) +
((1 - theta) * np.maximum(K, K.T)))
np.fill_diagonal(W, 0)
G = pygsp.graphs.Graph(W)
G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
kernel_symm='theta', theta=theta,
distance=metric, sample_idx=sample_idx, thresh=0,
use_pygsp=True)
Expand Down Expand Up @@ -179,11 +195,27 @@ def test_mnn_graph_matrix_theta():
pdxe_ij = pdx_ij / e_ij[:, np.newaxis] # normalize
k_ij = np.exp(-1 * (pdxe_ij ** a)) # apply alpha-decaying kernel
if si == sj:
K.iloc[sample_idx == si, sample_idx == sj] = k_ij * \
(1 - beta) # fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = (
k_ij + k_ij.T) / 2
else:
# fill out values in K for NN on diagonal
K.iloc[sample_idx == si, sample_idx == sj] = k_ij
Kn = K.copy()
for i in samples:
curr_K = K.iloc[sample_idx == i, sample_idx == i]
i_norm = norm(curr_K, 1, axis=1)
for j in samples:
if i == j:
continue
else:
curr_K = K.iloc[sample_idx == i, sample_idx == j]
curr_norm = norm(curr_K, 1, axis=1)
scale = np.minimum(
np.ones(len(curr_norm)), i_norm / curr_norm) * beta
Kn.iloc[sample_idx == i, sample_idx == j] = (
curr_K.T * scale).T

K = Kn

K = np.array(K)

Expand All @@ -197,7 +229,7 @@ def test_mnn_graph_matrix_theta():
((1 - matrix_theta) * np.maximum(K, K.T)))
np.fill_diagonal(W, 0)
G = pygsp.graphs.Graph(W)
G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=1 - beta,
G2 = graphtools.Graph(X, knn=k + 1, decay=a, beta=beta,
kernel_symm='theta', theta=theta,
distance=metric, sample_idx=sample_idx, thresh=0,
use_pygsp=True)
Expand Down

0 comments on commit 718d191

Please sign in to comment.