Merge pull request #19 from KrishnaswamyLab/windows7_logging

Logging with TaskLogger
KrishnaswamyLab · Jul 27, 2018 · d0d9191 · d0d9191
2 parents 05e809a + 77b0c68
commit d0d9191
Show file tree

Hide file tree

Showing 7 changed files with 59 additions and 229 deletions.
diff --git a/doc/source/reference.rst b/doc/source/reference.rst
@@ -36,12 +36,3 @@ Utilities
     :undoc-members:
     :inherited-members:
     :show-inheritance:
-
-Logging
--------
-
-.. automodule:: graphtools.logging
-    :members:
-    :undoc-members:
-    :inherited-members:
-    :show-inheritance:
diff --git a/graphtools/api.py b/graphtools/api.py
@@ -1,7 +1,7 @@
 import numpy as np
 import warnings
+import tasklogger
 
-from . import logging
 from . import base
 from . import graphs
 
@@ -137,7 +137,7 @@ def Graph(data,
     ------
     ValueError : if selected parameters are incompatible.
     """
-    logging.set_logging(verbose)
+    tasklogger.set_level(verbose)
     if sample_idx is not None and len(np.unique(sample_idx)) == 1:
         warnings.warn("Only one unique sample. "
                       "Not using MNNGraph")
@@ -197,7 +197,7 @@ def Graph(data,
         else:
             msg = msg + " and PyGSP inheritance"
 
-    logging.log_debug(msg)
+    tasklogger.log_debug(msg)
 
     class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
     try:
@@ -215,7 +215,7 @@ def Graph(data,
                 pass
 
     # build graph and return
-    logging.log_debug("Initializing {} with arguments {}".format(
+    tasklogger.log_debug("Initializing {} with arguments {}".format(
         parent_classes,
         ", ".join(["{}='{}'".format(key, value)
                    for key, value in params.items()

diff --git a/graphtools/base.py b/graphtools/base.py
@@ -9,6 +9,8 @@
 from scipy import sparse
 import warnings
 import numbers
+import tasklogger
+
 try:
     import pandas as pd
 except ImportError:
@@ -24,10 +26,6 @@
 from .utils import (elementwise_minimum,
                     elementwise_maximum,
                     set_diagonal)
-from .logging import (set_logging,
-                      log_start,
-                      log_complete,
-                      log_debug)
 
 
 class Base(object):
@@ -152,7 +150,7 @@ def _reduce_data(self):
         Reduced data matrix
         """
         if self.n_pca is not None and self.n_pca < self.data.shape[1]:
-            log_start("PCA")
+            tasklogger.log_start("PCA")
             if sparse.issparse(self.data):
                 if isinstance(self.data, sparse.coo_matrix) or \
                         isinstance(self.data, sparse.lil_matrix) or \
@@ -166,7 +164,7 @@ def _reduce_data(self):
                                     random_state=self.random_state)
             self.data_pca.fit(self.data)
             data_nu = self.data_pca.transform(self.data)
-            log_complete("PCA")
+            tasklogger.log_complete("PCA")
             return data_nu
         else:
             data_nu = self.data
@@ -342,10 +340,10 @@ def __init__(self, kernel_symm='+',
         self._check_symmetrization(kernel_symm, gamma)
 
         if initialize:
-            log_debug("Initializing kernel...")
+            tasklogger.log_debug("Initializing kernel...")
             self.K
         else:
-            log_debug("Not initializing kernel.")
+            tasklogger.log_debug("Not initializing kernel.")
         super().__init__(**kwargs)
 
     def _check_symmetrization(self, kernel_symm, gamma):
@@ -363,7 +361,8 @@ def _check_symmetrization(self, kernel_symm, gamma):
                 warnings.warn("kernel_symm='gamma' but gamma not given. "
                               "Defaulting to gamma=0.5.")
                 self.gamma = gamma = 0.5
-            elif not isinstance(gamma, numbers.Number) or gamma < 0 or gamma > 1:
+            elif not isinstance(gamma, numbers.Number) or \
+                    gamma < 0 or gamma > 1:
                 raise ValueError("gamma {} not recognized. Expected "
                                  "a float between 0 and 1".format(gamma))
 
@@ -392,18 +391,18 @@ def _build_kernel(self):
     def symmetrize_kernel(self, K):
         # symmetrize
         if self.kernel_symm == "+":
-            log_debug("Using addition symmetrization.")
+            tasklogger.log_debug("Using addition symmetrization.")
             K = (K + K.T) / 2
         elif self.kernel_symm == "*":
-            log_debug("Using multiplication symmetrization.")
+            tasklogger.log_debug("Using multiplication symmetrization.")
             K = K.multiply(K.T)
         elif self.kernel_symm == 'gamma':
-            log_debug(
+            tasklogger.log_debug(
                 "Using gamma symmetrization (gamma = {}).".format(self.gamma))
             K = self.gamma * elementwise_minimum(K, K.T) + \
                 (1 - self.gamma) * elementwise_maximum(K, K.T)
         elif self.kernel_symm is None:
-            log_debug("Using no symmetrization.")
+            tasklogger.log_debug("Using no symmetrization.")
             pass
         else:
             # this should never happen
@@ -438,7 +437,8 @@ def set_params(self, **params):
         """
         if 'gamma' in params and params['gamma'] != self.gamma:
             raise ValueError("Cannot update gamma. Please create a new graph")
-        if 'kernel_symm' in params and params['kernel_symm'] != self.kernel_symm:
+        if 'kernel_symm' in params and \
+                params['kernel_symm'] != self.kernel_symm:
             raise ValueError(
                 "Cannot update kernel_symm. Please create a new graph")
         return self
@@ -622,7 +622,7 @@ def __init__(self, data,
         # kwargs are ignored
         self.n_jobs = n_jobs
         self.verbose = verbose
-        set_logging(verbose)
+        tasklogger.set_level(verbose)
         super().__init__(data, **kwargs)
 
     def get_params(self):

diff --git a/graphtools/graphs.py b/graphtools/graphs.py
@@ -9,15 +9,12 @@
 from scipy import sparse
 import numbers
 import warnings
+import tasklogger
 
 from .utils import (set_diagonal,
                     elementwise_minimum,
                     elementwise_maximum,
                     set_submatrix)
-from .logging import (log_start,
-                      log_complete,
-                      log_warning,
-                      log_debug)
 from .base import DataGraph, PyGSPGraph
 
 
@@ -219,13 +216,13 @@ def build_kernel_to_data(self, Y, knn=None):
                               k=knn, n=self.data.shape[0]))
 
         Y = self._check_extension_shape(Y)
-        log_start("KNN search")
+        tasklogger.log_start("KNN search")
         if self.decay is None or self.thresh == 1:
             # binary connectivity matrix
             K = self.knn_tree.kneighbors_graph(
                 Y, n_neighbors=knn,
                 mode='connectivity')
-            log_complete("KNN search")
+            tasklogger.log_complete("KNN search")
         else:
             # sparse fast alpha decay
             knn_tree = self.knn_tree
@@ -246,15 +243,15 @@ def build_kernel_to_data(self, Y, knn=None):
                     "Consider removing duplicates to avoid errors in "
                     "downstream processing.".format(duplicate_names),
                     RuntimeWarning)
-            log_complete("KNN search")
-            log_start("affinities")
+            tasklogger.log_complete("KNN search")
+            tasklogger.log_start("affinities")
             bandwidth = distances[:, knn - 1]
             radius = bandwidth * np.power(-1 * np.log(self.thresh),
                                           1 / self.decay)
             update_idx = np.argwhere(
                 np.max(distances, axis=1) < radius).reshape(-1)
-            log_debug("search_knn = {}; {} remaining".format(search_knn,
-                                                             len(update_idx)))
+            tasklogger.log_debug("search_knn = {}; {} remaining".format(
+                search_knn, len(update_idx)))
             if len(update_idx) > 0:
                 distances = [d for d in distances]
                 indices = [i for i in indices]
@@ -269,14 +266,16 @@ def build_kernel_to_data(self, Y, knn=None):
                     indices[idx] = ind_new[i]
                 update_idx = [i for i, d in enumerate(distances)
                               if np.max(d) < radius[i]]
-                log_debug("search_knn = {}; {} remaining".format(
+                tasklogger.log_debug("search_knn = {}; {} remaining".format(
                     search_knn,
                     len(update_idx)))
             if search_knn > self.data_nu.shape[0] / 2:
-                knn_tree = NearestNeighbors(knn, algorithm='brute',
-                                            n_jobs=self.n_jobs).fit(self.data_nu)
+                knn_tree = NearestNeighbors(
+                    knn, algorithm='brute',
+                    n_jobs=self.n_jobs).fit(self.data_nu)
             if len(update_idx) > 0:
-                log_debug("radius search on {}".format(len(update_idx)))
+                tasklogger.log_debug(
+                    "radius search on {}".format(len(update_idx)))
                 # give up - radius search
                 dist_new, ind_new = knn_tree.radius_neighbors(
                     Y[update_idx, :],
@@ -297,7 +296,7 @@ def build_kernel_to_data(self, Y, knn=None):
             K = K.tocoo()
             K.eliminate_zeros()
             K = K.tocsr()
-            log_complete("affinities")
+            tasklogger.log_complete("affinities")
         return K
 
 
@@ -453,15 +452,15 @@ def build_landmark_op(self):
         probabilities between cluster centers by using transition probabilities
         between samples assigned to each cluster.
         """
-        log_start("landmark operator")
+        tasklogger.log_start("landmark operator")
         is_sparse = sparse.issparse(self.kernel)
         # spectral clustering
-        log_start("SVD")
+        tasklogger.log_start("SVD")
         _, _, VT = randomized_svd(self.diff_aff,
                                   n_components=self.n_svd,
                                   random_state=self.random_state)
-        log_complete("SVD")
-        log_start("KMeans")
+        tasklogger.log_complete("SVD")
+        tasklogger.log_start("KMeans")
         kmeans = MiniBatchKMeans(
             self.n_landmark,
             init_size=3 * self.n_landmark,
@@ -471,7 +470,7 @@ def build_landmark_op(self):
             self.diff_op.dot(VT.T))
         # some clusters are not assigned
         landmarks = np.unique(self._clusters)
-        log_complete("KMeans")
+        tasklogger.log_complete("KMeans")
 
         # transition matrices
         if is_sparse:
@@ -492,7 +491,7 @@ def build_landmark_op(self):
         # store output
         self._landmark_op = landmark_op
         self._transitions = pnm
-        log_complete("landmark operator")
+        tasklogger.log_complete("landmark operator")
 
     def extend_to_data(self, data, **kwargs):
         """Build transition matrix from new data to the graph
@@ -714,7 +713,7 @@ def build_kernel(self):
                 K = K.tolil()
             K = set_diagonal(K, 1)
         else:
-            log_start("affinities")
+            tasklogger.log_start("affinities")
             if sparse.issparse(self.data_nu):
                 self.data_nu = self.data_nu.toarray()
             if self.precomputed == "distance":
@@ -744,7 +743,7 @@ def build_kernel(self):
             epsilon = np.max(knn_dist, axis=1)
             pdx = (pdx.T / epsilon).T
             K = np.exp(-1 * np.power(pdx, self.decay))
-            log_complete("affinities")
+            tasklogger.log_complete("affinities")
         # truncate
         if sparse.issparse(K):
             if not (isinstance(K, sparse.csr_matrix) or
@@ -794,15 +793,15 @@ def build_kernel_to_data(self, Y, knn=None):
         if self.precomputed is not None:
             raise ValueError("Cannot extend kernel on precomputed graph")
         else:
-            log_start("affinities")
+            tasklogger.log_start("affinities")
             Y = self._check_extension_shape(Y)
             pdx = cdist(Y, self.data_nu, metric=self.distance)
             knn_dist = np.partition(pdx, knn, axis=1)[:, :knn]
             epsilon = np.max(knn_dist, axis=1)
             pdx = (pdx.T / epsilon).T
             K = np.exp(-1 * pdx**self.decay)
             K[K < self.thresh] = 0
-            log_complete("affinities")
+            tasklogger.log_complete("affinities")
         return K
 
 
@@ -860,7 +859,8 @@ def __init__(self, data, sample_idx,
                              " batch correction, use kNNGraph.")
         elif len(sample_idx) != data.shape[0]:
             raise ValueError("sample_idx ({}) must be the same length as "
-                             "data ({})".format(len(sample_idx), data.shape[0]))
+                             "data ({})".format(len(sample_idx),
+                                                data.shape[0]))
         elif len(self.samples) == 1:
             raise ValueError(
                 "sample_idx must contain more than one unique value")
@@ -1000,13 +1000,15 @@ def build_kernel(self):
             symmetric matrix with ones down the diagonal
             with no non-negative entries.
         """
-        log_start("subgraphs")
+        tasklogger.log_start("subgraphs")
         self.subgraphs = []
         from .api import Graph
         # iterate through sample ids
         for i, idx in enumerate(self.samples):
-            log_debug("subgraph {}: sample {}, n = {}, knn = {}".format(
-                i, idx, np.sum(self.sample_idx == idx), self.weighted_knn[i]))
+            tasklogger.log_debug("subgraph {}: sample {}, "
+                                 "n = {}, knn = {}".format(
+                                     i, idx, np.sum(self.sample_idx == idx),
+                                     self.weighted_knn[i]))
             # select data for sample
             data = self.data_nu[self.sample_idx == idx]
             # build a kNN graph for cells within sample
@@ -1019,7 +1021,7 @@ def build_kernel(self):
                           random_state=self.random_state,
                           initialize=False)
             self.subgraphs.append(graph)  # append to list of subgraphs
-        log_complete("subgraphs")
+        tasklogger.log_complete("subgraphs")
 
         if self.thresh > 0 or self.decay is None:
             K = sparse.lil_matrix(
@@ -1028,7 +1030,7 @@ def build_kernel(self):
             K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
         for i, X in enumerate(self.subgraphs):
             for j, Y in enumerate(self.subgraphs):
-                log_start(
+                tasklogger.log_start(
                     "kernel from sample {} to {}".format(self.samples[i],
                                                          self.samples[j]))
                 Kij = Y.build_kernel_to_data(
@@ -1039,7 +1041,7 @@ def build_kernel(self):
                     Kij = Kij * self.beta
                 K = set_submatrix(K, self.sample_idx == self.samples[i],
                                   self.sample_idx == self.samples[j], Kij)
-                log_complete(
+                tasklogger.log_complete(
                     "kernel from sample {} to {}".format(self.samples[i],
                                                          self.samples[j]))
         return K
@@ -1051,8 +1053,8 @@ def symmetrize_kernel(self, K):
             # Gamma can be a matrix with specific values transitions for
             # each batch. This allows for technical replicates and
             # experimental samples to be corrected simultaneously
-            log_debug("Using gamma symmetrization. "
-                      "Gamma:\n{}".format(self.gamma))
+            tasklogger.log_debug("Using gamma symmetrization. "
+                                 "Gamma:\n{}".format(self.gamma))
             for i, sample_i in enumerate(self.samples):
                 for j, sample_j in enumerate(self.samples):
                     if j < i:
@@ -1106,7 +1108,7 @@ def build_kernel_to_data(self, Y, gamma=None):
             Transition matrix from `Y` to `self.data`
         """
         raise NotImplementedError
-        log_warning("building MNN kernel to gamma is experimental")
+        tasklogger.log_warning("building MNN kernel to gamma is experimental")
         if not isinstance(self.gamma, str) and \
                 not isinstance(self.gamma, numbers.Number):
             if gamma is None: