Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions graphtools/api.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import numpy as np
import warnings
import tasklogger
from scipy import sparse
import pickle
import pygsp
import tasklogger

from . import base, graphs

from . import base
from . import graphs
_logger = tasklogger.get_tasklogger('graphtools')


def Graph(data,
Expand Down Expand Up @@ -173,7 +174,7 @@ def Graph(data,
“Numerical Recipes (3rd edition)”,
Cambridge University Press, 2007, page 795.
"""
tasklogger.set_level(verbose)
_logger.set_level(verbose)
if sample_idx is not None and len(np.unique(sample_idx)) == 1:
warnings.warn("Only one unique sample. "
"Not using MNNGraph")
Expand Down Expand Up @@ -239,7 +240,7 @@ def Graph(data,
else:
msg = msg + " and PyGSP inheritance"

tasklogger.log_debug(msg)
_logger.debug(msg)

class_names = [p.__name__.replace("Graph", "") for p in parent_classes]
try:
Expand All @@ -257,7 +258,7 @@ def Graph(data,
pass

# build graph and return
tasklogger.log_debug("Initializing {} with arguments {}".format(
_logger.debug("Initializing {} with arguments {}".format(
parent_classes,
", ".join(["{}='{}'".format(key, value)
for key, value in params.items()
Expand Down
116 changes: 61 additions & 55 deletions graphtools/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from scipy import sparse
import warnings
import numbers
import tasklogger
import pickle
import sys
import tasklogger

try:
import pandas as pd
Expand All @@ -29,6 +29,8 @@

from . import utils

_logger = tasklogger.get_tasklogger('graphtools')


class Base(object):
"""Class that deals with key-word arguments but is otherwise
Expand Down Expand Up @@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
n_pca = None
elif n_pca is True: # notify that we're going to estimate rank.
n_pca = 'auto'
tasklogger.log_info("Estimating n_pca from matrix rank. "
_logger.info("Estimating n_pca from matrix rank. "
"Supply an integer n_pca "
"for fixed amount.")
if not any([isinstance(n_pca, numbers.Number),
Expand Down Expand Up @@ -233,45 +235,44 @@ def _reduce_data(self):
Reduced data matrix
"""
if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):
tasklogger.log_start("PCA")
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
if sparse.issparse(self.data):
if isinstance(self.data, sparse.coo_matrix) or \
isinstance(self.data, sparse.lil_matrix) or \
isinstance(self.data, sparse.dok_matrix):
self.data = self.data.tocsr()
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
else:
self.data_pca = PCA(n_pca,
svd_solver='randomized',
random_state=self.random_state)
self.data_pca.fit(self.data)
if self.n_pca == 'auto':
s = self.data_pca.singular_values_
smax = s.max()
if self.rank_threshold == 'auto':
threshold = smax * \
np.finfo(self.data.dtype).eps * max(self.data.shape)
self.rank_threshold = threshold
threshold = self.rank_threshold
gate = np.where(s >= threshold)[0]
self.n_pca = gate.shape[0]
if self.n_pca == 0:
raise ValueError("Supplied threshold {} was greater than "
"maximum singular value {} "
"for the data matrix".format(threshold, smax))
tasklogger.log_info(
"Using rank estimate of {} as n_pca".format(self.n_pca))
# reset the sklearn operator
op = self.data_pca # for line-width brevity..
op.components_ = op.components_[gate, :]
op.explained_variance_ = op.explained_variance_[gate]
op.explained_variance_ratio_ = op.explained_variance_ratio_[
gate]
op.singular_values_ = op.singular_values_[gate]
self.data_pca = op # im not clear if this is needed due to assignment rules
data_nu = self.data_pca.transform(self.data)
tasklogger.log_complete("PCA")
with _logger.task("PCA"):
n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca
if sparse.issparse(self.data):
if isinstance(self.data, sparse.coo_matrix) or \
isinstance(self.data, sparse.lil_matrix) or \
isinstance(self.data, sparse.dok_matrix):
self.data = self.data.tocsr()
self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)
else:
self.data_pca = PCA(n_pca,
svd_solver='randomized',
random_state=self.random_state)
self.data_pca.fit(self.data)
if self.n_pca == 'auto':
s = self.data_pca.singular_values_
smax = s.max()
if self.rank_threshold == 'auto':
threshold = smax * \
np.finfo(self.data.dtype).eps * max(self.data.shape)
self.rank_threshold = threshold
threshold = self.rank_threshold
gate = np.where(s >= threshold)[0]
self.n_pca = gate.shape[0]
if self.n_pca == 0:
raise ValueError("Supplied threshold {} was greater than "
"maximum singular value {} "
"for the data matrix".format(threshold, smax))
_logger.info(
"Using rank estimate of {} as n_pca".format(self.n_pca))
# reset the sklearn operator
op = self.data_pca # for line-width brevity..
op.components_ = op.components_[gate, :]
op.explained_variance_ = op.explained_variance_[gate]
op.explained_variance_ratio_ = op.explained_variance_ratio_[
gate]
op.singular_values_ = op.singular_values_[gate]
self.data_pca = op # im not clear if this is needed due to assignment rules
data_nu = self.data_pca.transform(self.data)
return data_nu
else:
data_nu = self.data
Expand Down Expand Up @@ -472,10 +473,10 @@ def __init__(self,
self.anisotropy = anisotropy

if initialize:
tasklogger.log_debug("Initializing kernel...")
_logger.debug("Initializing kernel...")
self.K
else:
tasklogger.log_debug("Not initializing kernel.")
_logger.debug("Not initializing kernel.")
super().__init__(**kwargs)

def _check_symmetrization(self, kernel_symm, theta):
Expand Down Expand Up @@ -524,18 +525,18 @@ def _build_kernel(self):
def symmetrize_kernel(self, K):
# symmetrize
if self.kernel_symm == "+":
tasklogger.log_debug("Using addition symmetrization.")
_logger.debug("Using addition symmetrization.")
K = (K + K.T) / 2
elif self.kernel_symm == "*":
tasklogger.log_debug("Using multiplication symmetrization.")
_logger.debug("Using multiplication symmetrization.")
K = K.multiply(K.T)
elif self.kernel_symm == 'mnn':
tasklogger.log_debug(
_logger.debug(
"Using mnn symmetrization (theta = {}).".format(self.theta))
K = self.theta * utils.elementwise_minimum(K, K.T) + \
(1 - self.theta) * utils.elementwise_maximum(K, K.T)
elif self.kernel_symm is None:
tasklogger.log_debug("Using no symmetrization.")
_logger.debug("Using no symmetrization.")
pass
else:
# this should never happen
Expand Down Expand Up @@ -729,12 +730,12 @@ def to_pygsp(self, **kwargs):
def to_igraph(self, attribute="weight", **kwargs):
"""Convert to an igraph Graph

Uses the igraph.Graph.Weighted_Adjacency constructor
Uses the igraph.Graph constructor

Parameters
----------
attribute : str, optional (default: "weight")
kwargs : additional arguments for igraph.Graph.Weighted_Adjacency
kwargs : additional arguments for igraph.Graph
"""
try:
import igraph as ig
Expand All @@ -747,8 +748,13 @@ def to_igraph(self, attribute="weight", **kwargs):
# not a pygsp graph
W = self.K.copy()
W = utils.set_diagonal(W, 0)
return ig.Graph.Weighted_Adjacency(utils.to_array(W).tolist(),
attr=attribute, **kwargs)
sources, targets = W.nonzero()
edgelist = list(zip(sources, targets))
g = ig.Graph(W.shape[0], edgelist, **kwargs)
weights = W[W.nonzero()]
weights = utils.to_array(weights)
g.es[attribute] = weights.flatten().tolist()
return g

def to_pickle(self, path):
"""Save the current Graph to a pickle.
Expand Down Expand Up @@ -787,10 +793,10 @@ def _check_shortest_path_distance(self, distance):
def _default_shortest_path_distance(self):
if not self.weighted:
distance = 'data'
tasklogger.log_info("Using ambient data distances.")
_logger.info("Using ambient data distances.")
else:
distance = 'affinity'
tasklogger.log_info("Using negative log affinity distances.")
_logger.info("Using negative log affinity distances.")
return distance

def shortest_path(self, method='auto', distance=None):
Expand Down Expand Up @@ -954,7 +960,7 @@ def __init__(self, data,
# kwargs are ignored
self.n_jobs = n_jobs
self.verbose = verbose
tasklogger.set_level(verbose)
_logger.set_level(verbose)
super().__init__(data, **kwargs)

def get_params(self):
Expand Down Expand Up @@ -1117,6 +1123,6 @@ def set_params(self, **params):
self.n_jobs = params['n_jobs']
if 'verbose' in params:
self.verbose = params['verbose']
tasklogger.set_level(self.verbose)
_logger.set_level(self.verbose)
super().set_params(**params)
return self
Loading