refactor tasklogger interface

KrishnaswamyLab · Oct 13, 2019 · 5209f13 · 5209f13
1 parent 5865f1a
commit 5209f13
Show file tree

Hide file tree

Showing 5 changed files with 278 additions and 288 deletions.
diff --git a/graphtools/api.py b/graphtools/api.py
@@ -1,12 +1,13 @@
 import numpy as np
 import warnings
-import tasklogger
 from scipy import sparse
 import pickle
 import pygsp
+import tasklogger
+
+from . import base, graphs
 
-from . import base
-from . import graphs
+_logger = tasklogger.get_tasklogger('graphtools')
 
 
 def Graph(data,
@@ -173,7 +174,7 @@ def Graph(data,
         “Numerical Recipes (3rd edition)”,
         Cambridge University Press, 2007, page 795.
     """
-    tasklogger.set_level(verbose)
+    _logger.set_level(verbose)
     if sample_idx is not None and len(np.unique(sample_idx)) == 1:
         warnings.warn("Only one unique sample. "
                       "Not using MNNGraph")
@@ -239,7 +240,7 @@ def Graph(data,
         else:
             msg = msg + " and PyGSP inheritance"
 
-    tasklogger.log_debug(msg)
+    _logger.debug(msg)
 
     class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
     try:
@@ -257,7 +258,7 @@ def Graph(data,
                 pass
 
     # build graph and return
-    tasklogger.log_debug("Initializing {} with arguments {}".format(
+    _logger.debug("Initializing {} with arguments {}".format(
         parent_classes,
         ", ".join(["{}='{}'".format(key, value)
                    for key, value in params.items()

diff --git a/graphtools/base.py b/graphtools/base.py
@@ -11,9 +11,9 @@
 from scipy import sparse
 import warnings
 import numbers
-import tasklogger
 import pickle
 import sys
+import tasklogger
 
 try:
     import pandas as pd
@@ -29,6 +29,8 @@
 
 from . import utils
 
+_logger = tasklogger.get_tasklogger('graphtools')
+
 
 class Base(object):
     """Class that deals with key-word arguments but is otherwise
@@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
             n_pca = None
         elif n_pca is True:  # notify that we're going to estimate rank.
             n_pca = 'auto'
-            tasklogger.log_info("Estimating n_pca from matrix rank. "
+            _logger.info("Estimating n_pca from matrix rank. "
                                 "Supply an integer n_pca "
                                 "for fixed amount.")
         if not any([isinstance(n_pca, numbers.Number),
@@ -233,45 +235,44 @@ def _reduce_data(self):
         Reduced data matrix
         """
         if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):
-            tasklogger.log_start("PCA")
-            n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
-            if sparse.issparse(self.data):
-                if isinstance(self.data, sparse.coo_matrix) or \
-                        isinstance(self.data, sparse.lil_matrix) or \
-                        isinstance(self.data, sparse.dok_matrix):
-                    self.data = self.data.tocsr()
-                self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
-            else:
-                self.data_pca = PCA(n_pca,
-                                    svd_solver='randomized',
-                                    random_state=self.random_state)
-            self.data_pca.fit(self.data)
-            if self.n_pca == 'auto':
-                s = self.data_pca.singular_values_
-                smax = s.max()
-                if self.rank_threshold == 'auto':
-                    threshold = smax * \
-                        np.finfo(self.data.dtype).eps * max(self.data.shape)
-                    self.rank_threshold = threshold
-                threshold = self.rank_threshold
-                gate = np.where(s >= threshold)[0]
-                self.n_pca = gate.shape[0]
-                if self.n_pca == 0:
-                    raise ValueError("Supplied threshold {} was greater than "
-                                     "maximum singular value {} "
-                                     "for the data matrix".format(threshold, smax))
-                tasklogger.log_info(
-                    "Using rank estimate of {} as n_pca".format(self.n_pca))
-                # reset the sklearn operator
-                op = self.data_pca  # for line-width brevity..
-                op.components_ = op.components_[gate, :]
-                op.explained_variance_ = op.explained_variance_[gate]
-                op.explained_variance_ratio_ = op.explained_variance_ratio_[
-                    gate]
-                op.singular_values_ = op.singular_values_[gate]
-                self.data_pca = op  # im not clear if this is needed due to assignment rules
-            data_nu = self.data_pca.transform(self.data)
-            tasklogger.log_complete("PCA")
+            with _logger.task("PCA"):
+                n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
+                if sparse.issparse(self.data):
+                    if isinstance(self.data, sparse.coo_matrix) or \
+                            isinstance(self.data, sparse.lil_matrix) or \
+                            isinstance(self.data, sparse.dok_matrix):
+                        self.data = self.data.tocsr()
+                    self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
+                else:
+                    self.data_pca = PCA(n_pca,
+                                        svd_solver='randomized',
+                                        random_state=self.random_state)
+                self.data_pca.fit(self.data)
+                if self.n_pca == 'auto':
+                    s = self.data_pca.singular_values_
+                    smax = s.max()
+                    if self.rank_threshold == 'auto':
+                        threshold = smax * \
+                            np.finfo(self.data.dtype).eps * max(self.data.shape)
+                        self.rank_threshold = threshold
+                    threshold = self.rank_threshold
+                    gate = np.where(s >= threshold)[0]
+                    self.n_pca = gate.shape[0]
+                    if self.n_pca == 0:
+                        raise ValueError("Supplied threshold {} was greater than "
+                                         "maximum singular value {} "
+                                         "for the data matrix".format(threshold, smax))
+                    _logger.info(
+                        "Using rank estimate of {} as n_pca".format(self.n_pca))
+                    # reset the sklearn operator
+                    op = self.data_pca  # for line-width brevity..
+                    op.components_ = op.components_[gate, :]
+                    op.explained_variance_ = op.explained_variance_[gate]
+                    op.explained_variance_ratio_ = op.explained_variance_ratio_[
+                        gate]
+                    op.singular_values_ = op.singular_values_[gate]
+                    self.data_pca = op  # im not clear if this is needed due to assignment rules
+                data_nu = self.data_pca.transform(self.data)
             return data_nu
         else:
             data_nu = self.data
@@ -472,10 +473,10 @@ def __init__(self,
         self.anisotropy = anisotropy
 
         if initialize:
-            tasklogger.log_debug("Initializing kernel...")
+            _logger.debug("Initializing kernel...")
             self.K
         else:
-            tasklogger.log_debug("Not initializing kernel.")
+            _logger.debug("Not initializing kernel.")
         super().__init__(**kwargs)
 
     def _check_symmetrization(self, kernel_symm, theta):
@@ -524,18 +525,18 @@ def _build_kernel(self):
     def symmetrize_kernel(self, K):
         # symmetrize
         if self.kernel_symm == "+":
-            tasklogger.log_debug("Using addition symmetrization.")
+            _logger.debug("Using addition symmetrization.")
             K = (K + K.T) / 2
         elif self.kernel_symm == "*":
-            tasklogger.log_debug("Using multiplication symmetrization.")
+            _logger.debug("Using multiplication symmetrization.")
             K = K.multiply(K.T)
         elif self.kernel_symm == 'mnn':
-            tasklogger.log_debug(
+            _logger.debug(
                 "Using mnn symmetrization (theta = {}).".format(self.theta))
             K = self.theta * utils.elementwise_minimum(K, K.T) + \
                 (1 - self.theta) * utils.elementwise_maximum(K, K.T)
         elif self.kernel_symm is None:
-            tasklogger.log_debug("Using no symmetrization.")
+            _logger.debug("Using no symmetrization.")
             pass
         else:
             # this should never happen
@@ -787,10 +788,10 @@ def _check_shortest_path_distance(self, distance):
     def _default_shortest_path_distance(self):
         if not self.weighted:
             distance = 'data'
-            tasklogger.log_info("Using ambient data distances.")
+            _logger.info("Using ambient data distances.")
         else:
             distance = 'affinity'
-            tasklogger.log_info("Using negative log affinity distances.")
+            _logger.info("Using negative log affinity distances.")
         return distance
 
     def shortest_path(self, method='auto', distance=None):
@@ -954,7 +955,7 @@ def __init__(self, data,
         # kwargs are ignored
         self.n_jobs = n_jobs
         self.verbose = verbose
-        tasklogger.set_level(verbose)
+        _logger.set_level(verbose)
         super().__init__(data, **kwargs)
 
     def get_params(self):
@@ -1117,6 +1118,6 @@ def set_params(self, **params):
             self.n_jobs = params['n_jobs']
         if 'verbose' in params:
             self.verbose = params['verbose']
-            tasklogger.set_level(self.verbose)
+            _logger.set_level(self.verbose)
         super().set_params(**params)
         return self