Skip to content

Commit

Permalink
Merge pull request #19 from KrishnaswamyLab/windows7_logging
Browse files Browse the repository at this point in the history
Logging with TaskLogger
  • Loading branch information
scottgigante committed Jul 27, 2018
2 parents 05e809a + 77b0c68 commit d0d9191
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 229 deletions.
9 changes: 0 additions & 9 deletions doc/source/reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,3 @@ Utilities
:undoc-members:
:inherited-members:
:show-inheritance:

Logging
-------

.. automodule:: graphtools.logging
:members:
:undoc-members:
:inherited-members:
:show-inheritance:
8 changes: 4 additions & 4 deletions graphtools/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import warnings
import tasklogger

from . import logging
from . import base
from . import graphs

Expand Down Expand Up @@ -137,7 +137,7 @@ def Graph(data,
------
ValueError : if selected parameters are incompatible.
"""
logging.set_logging(verbose)
tasklogger.set_level(verbose)
if sample_idx is not None and len(np.unique(sample_idx)) == 1:
warnings.warn("Only one unique sample. "
"Not using MNNGraph")
Expand Down Expand Up @@ -197,7 +197,7 @@ def Graph(data,
else:
msg = msg + " and PyGSP inheritance"

logging.log_debug(msg)
tasklogger.log_debug(msg)

class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
try:
Expand All @@ -215,7 +215,7 @@ def Graph(data,
pass

# build graph and return
logging.log_debug("Initializing {} with arguments {}".format(
tasklogger.log_debug("Initializing {} with arguments {}".format(
parent_classes,
", ".join(["{}='{}'".format(key, value)
for key, value in params.items()
Expand Down
30 changes: 15 additions & 15 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from scipy import sparse
import warnings
import numbers
import tasklogger

try:
import pandas as pd
except ImportError:
Expand All @@ -24,10 +26,6 @@
from .utils import (elementwise_minimum,
elementwise_maximum,
set_diagonal)
from .logging import (set_logging,
log_start,
log_complete,
log_debug)


class Base(object):
Expand Down Expand Up @@ -152,7 +150,7 @@ def _reduce_data(self):
Reduced data matrix
"""
if self.n_pca is not None and self.n_pca < self.data.shape[1]:
log_start("PCA")
tasklogger.log_start("PCA")
if sparse.issparse(self.data):
if isinstance(self.data, sparse.coo_matrix) or \
isinstance(self.data, sparse.lil_matrix) or \
Expand All @@ -166,7 +164,7 @@ def _reduce_data(self):
random_state=self.random_state)
self.data_pca.fit(self.data)
data_nu = self.data_pca.transform(self.data)
log_complete("PCA")
tasklogger.log_complete("PCA")
return data_nu
else:
data_nu = self.data
Expand Down Expand Up @@ -342,10 +340,10 @@ def __init__(self, kernel_symm='+',
self._check_symmetrization(kernel_symm, gamma)

if initialize:
log_debug("Initializing kernel...")
tasklogger.log_debug("Initializing kernel...")
self.K
else:
log_debug("Not initializing kernel.")
tasklogger.log_debug("Not initializing kernel.")
super().__init__(**kwargs)

def _check_symmetrization(self, kernel_symm, gamma):
Expand All @@ -363,7 +361,8 @@ def _check_symmetrization(self, kernel_symm, gamma):
warnings.warn("kernel_symm='gamma' but gamma not given. "
"Defaulting to gamma=0.5.")
self.gamma = gamma = 0.5
elif not isinstance(gamma, numbers.Number) or gamma < 0 or gamma > 1:
elif not isinstance(gamma, numbers.Number) or \
gamma < 0 or gamma > 1:
raise ValueError("gamma {} not recognized. Expected "
"a float between 0 and 1".format(gamma))

Expand Down Expand Up @@ -392,18 +391,18 @@ def _build_kernel(self):
def symmetrize_kernel(self, K):
# symmetrize
if self.kernel_symm == "+":
log_debug("Using addition symmetrization.")
tasklogger.log_debug("Using addition symmetrization.")
K = (K + K.T) / 2
elif self.kernel_symm == "*":
log_debug("Using multiplication symmetrization.")
tasklogger.log_debug("Using multiplication symmetrization.")
K = K.multiply(K.T)
elif self.kernel_symm == 'gamma':
log_debug(
tasklogger.log_debug(
"Using gamma symmetrization (gamma = {}).".format(self.gamma))
K = self.gamma * elementwise_minimum(K, K.T) + \
(1 - self.gamma) * elementwise_maximum(K, K.T)
elif self.kernel_symm is None:
log_debug("Using no symmetrization.")
tasklogger.log_debug("Using no symmetrization.")
pass
else:
# this should never happen
Expand Down Expand Up @@ -438,7 +437,8 @@ def set_params(self, **params):
"""
if 'gamma' in params and params['gamma'] != self.gamma:
raise ValueError("Cannot update gamma. Please create a new graph")
if 'kernel_symm' in params and params['kernel_symm'] != self.kernel_symm:
if 'kernel_symm' in params and \
params['kernel_symm'] != self.kernel_symm:
raise ValueError(
"Cannot update kernel_symm. Please create a new graph")
return self
Expand Down Expand Up @@ -622,7 +622,7 @@ def __init__(self, data,
# kwargs are ignored
self.n_jobs = n_jobs
self.verbose = verbose
set_logging(verbose)
tasklogger.set_level(verbose)
super().__init__(data, **kwargs)

def get_params(self):
Expand Down
72 changes: 37 additions & 35 deletions graphtools/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,12 @@
from scipy import sparse
import numbers
import warnings
import tasklogger

from .utils import (set_diagonal,
elementwise_minimum,
elementwise_maximum,
set_submatrix)
from .logging import (log_start,
log_complete,
log_warning,
log_debug)
from .base import DataGraph, PyGSPGraph


Expand Down Expand Up @@ -219,13 +216,13 @@ def build_kernel_to_data(self, Y, knn=None):
k=knn, n=self.data.shape[0]))

Y = self._check_extension_shape(Y)
log_start("KNN search")
tasklogger.log_start("KNN search")
if self.decay is None or self.thresh == 1:
# binary connectivity matrix
K = self.knn_tree.kneighbors_graph(
Y, n_neighbors=knn,
mode='connectivity')
log_complete("KNN search")
tasklogger.log_complete("KNN search")
else:
# sparse fast alpha decay
knn_tree = self.knn_tree
Expand All @@ -246,15 +243,15 @@ def build_kernel_to_data(self, Y, knn=None):
"Consider removing duplicates to avoid errors in "
"downstream processing.".format(duplicate_names),
RuntimeWarning)
log_complete("KNN search")
log_start("affinities")
tasklogger.log_complete("KNN search")
tasklogger.log_start("affinities")
bandwidth = distances[:, knn - 1]
radius = bandwidth * np.power(-1 * np.log(self.thresh),
1 / self.decay)
update_idx = np.argwhere(
np.max(distances, axis=1) < radius).reshape(-1)
log_debug("search_knn = {}; {} remaining".format(search_knn,
len(update_idx)))
tasklogger.log_debug("search_knn = {}; {} remaining".format(
search_knn, len(update_idx)))
if len(update_idx) > 0:
distances = [d for d in distances]
indices = [i for i in indices]
Expand All @@ -269,14 +266,16 @@ def build_kernel_to_data(self, Y, knn=None):
indices[idx] = ind_new[i]
update_idx = [i for i, d in enumerate(distances)
if np.max(d) < radius[i]]
log_debug("search_knn = {}; {} remaining".format(
tasklogger.log_debug("search_knn = {}; {} remaining".format(
search_knn,
len(update_idx)))
if search_knn > self.data_nu.shape[0] / 2:
knn_tree = NearestNeighbors(knn, algorithm='brute',
n_jobs=self.n_jobs).fit(self.data_nu)
knn_tree = NearestNeighbors(
knn, algorithm='brute',
n_jobs=self.n_jobs).fit(self.data_nu)
if len(update_idx) > 0:
log_debug("radius search on {}".format(len(update_idx)))
tasklogger.log_debug(
"radius search on {}".format(len(update_idx)))
# give up - radius search
dist_new, ind_new = knn_tree.radius_neighbors(
Y[update_idx, :],
Expand All @@ -297,7 +296,7 @@ def build_kernel_to_data(self, Y, knn=None):
K = K.tocoo()
K.eliminate_zeros()
K = K.tocsr()
log_complete("affinities")
tasklogger.log_complete("affinities")
return K


Expand Down Expand Up @@ -453,15 +452,15 @@ def build_landmark_op(self):
probabilities between cluster centers by using transition probabilities
between samples assigned to each cluster.
"""
log_start("landmark operator")
tasklogger.log_start("landmark operator")
is_sparse = sparse.issparse(self.kernel)
# spectral clustering
log_start("SVD")
tasklogger.log_start("SVD")
_, _, VT = randomized_svd(self.diff_aff,
n_components=self.n_svd,
random_state=self.random_state)
log_complete("SVD")
log_start("KMeans")
tasklogger.log_complete("SVD")
tasklogger.log_start("KMeans")
kmeans = MiniBatchKMeans(
self.n_landmark,
init_size=3 * self.n_landmark,
Expand All @@ -471,7 +470,7 @@ def build_landmark_op(self):
self.diff_op.dot(VT.T))
# some clusters are not assigned
landmarks = np.unique(self._clusters)
log_complete("KMeans")
tasklogger.log_complete("KMeans")

# transition matrices
if is_sparse:
Expand All @@ -492,7 +491,7 @@ def build_landmark_op(self):
# store output
self._landmark_op = landmark_op
self._transitions = pnm
log_complete("landmark operator")
tasklogger.log_complete("landmark operator")

def extend_to_data(self, data, **kwargs):
"""Build transition matrix from new data to the graph
Expand Down Expand Up @@ -714,7 +713,7 @@ def build_kernel(self):
K = K.tolil()
K = set_diagonal(K, 1)
else:
log_start("affinities")
tasklogger.log_start("affinities")
if sparse.issparse(self.data_nu):
self.data_nu = self.data_nu.toarray()
if self.precomputed == "distance":
Expand Down Expand Up @@ -744,7 +743,7 @@ def build_kernel(self):
epsilon = np.max(knn_dist, axis=1)
pdx = (pdx.T / epsilon).T
K = np.exp(-1 * np.power(pdx, self.decay))
log_complete("affinities")
tasklogger.log_complete("affinities")
# truncate
if sparse.issparse(K):
if not (isinstance(K, sparse.csr_matrix) or
Expand Down Expand Up @@ -794,15 +793,15 @@ def build_kernel_to_data(self, Y, knn=None):
if self.precomputed is not None:
raise ValueError("Cannot extend kernel on precomputed graph")
else:
log_start("affinities")
tasklogger.log_start("affinities")
Y = self._check_extension_shape(Y)
pdx = cdist(Y, self.data_nu, metric=self.distance)
knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
epsilon = np.max(knn_dist, axis=1)
pdx = (pdx.T / epsilon).T
K = np.exp(-1 * pdx**self.decay)
K[K < self.thresh] = 0
log_complete("affinities")
tasklogger.log_complete("affinities")
return K


Expand Down Expand Up @@ -860,7 +859,8 @@ def __init__(self, data, sample_idx,
" batch correction, use kNNGraph.")
elif len(sample_idx) != data.shape[0]:
raise ValueError("sample_idx ({}) must be the same length as "
"data ({})".format(len(sample_idx), data.shape[0]))
"data ({})".format(len(sample_idx),
data.shape[0]))
elif len(self.samples) == 1:
raise ValueError(
"sample_idx must contain more than one unique value")
Expand Down Expand Up @@ -1000,13 +1000,15 @@ def build_kernel(self):
symmetric matrix with ones down the diagonal
with no non-negative entries.
"""
log_start("subgraphs")
tasklogger.log_start("subgraphs")
self.subgraphs = []
from .api import Graph
# iterate through sample ids
for i, idx in enumerate(self.samples):
log_debug("subgraph {}: sample {}, n = {}, knn = {}".format(
i, idx, np.sum(self.sample_idx == idx), self.weighted_knn[i]))
tasklogger.log_debug("subgraph {}: sample {}, "
"n = {}, knn = {}".format(
i, idx, np.sum(self.sample_idx == idx),
self.weighted_knn[i]))
# select data for sample
data = self.data_nu[self.sample_idx == idx]
# build a kNN graph for cells within sample
Expand All @@ -1019,7 +1021,7 @@ def build_kernel(self):
random_state=self.random_state,
initialize=False)
self.subgraphs.append(graph) # append to list of subgraphs
log_complete("subgraphs")
tasklogger.log_complete("subgraphs")

if self.thresh > 0 or self.decay is None:
K = sparse.lil_matrix(
Expand All @@ -1028,7 +1030,7 @@ def build_kernel(self):
K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
for i, X in enumerate(self.subgraphs):
for j, Y in enumerate(self.subgraphs):
log_start(
tasklogger.log_start(
"kernel from sample {} to {}".format(self.samples[i],
self.samples[j]))
Kij = Y.build_kernel_to_data(
Expand All @@ -1039,7 +1041,7 @@ def build_kernel(self):
Kij = Kij * self.beta
K = set_submatrix(K, self.sample_idx == self.samples[i],
self.sample_idx == self.samples[j], Kij)
log_complete(
tasklogger.log_complete(
"kernel from sample {} to {}".format(self.samples[i],
self.samples[j]))
return K
Expand All @@ -1051,8 +1053,8 @@ def symmetrize_kernel(self, K):
# Gamma can be a matrix with specific values transitions for
# each batch. This allows for technical replicates and
# experimental samples to be corrected simultaneously
log_debug("Using gamma symmetrization. "
"Gamma:\n{}".format(self.gamma))
tasklogger.log_debug("Using gamma symmetrization. "
"Gamma:\n{}".format(self.gamma))
for i, sample_i in enumerate(self.samples):
for j, sample_j in enumerate(self.samples):
if j < i:
Expand Down Expand Up @@ -1106,7 +1108,7 @@ def build_kernel_to_data(self, Y, gamma=None):
Transition matrix from `Y` to `self.data`
"""
raise NotImplementedError
log_warning("building MNN kernel to gamma is experimental")
tasklogger.log_warning("building MNN kernel to gamma is experimental")
if not isinstance(self.gamma, str) and \
not isinstance(self.gamma, numbers.Number):
if gamma is None:
Expand Down

0 comments on commit d0d9191

Please sign in to comment.