graphtools v1.3.1 #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged

scottgigante merged 3 commits into master from dev

Oct 15, 2019

graphtools/api.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,12 +1,13 @@
  
    import numpy as np

    import warnings

    import tasklogger

    from scipy import sparse

    import pickle

    import pygsp

    import tasklogger

    from . import base, graphs

    from . import base

    from . import graphs

    _logger = tasklogger.get_tasklogger('graphtools')

    def Graph(data,

    @@ -173,7 +174,7 @@ def Graph(data,
  
            “Numerical Recipes (3rd edition)”,

            Cambridge University Press, 2007, page 795.

        """

        tasklogger.set_level(verbose)

        _logger.set_level(verbose)

        if sample_idx is not None and len(np.unique(sample_idx)) == 1:

            warnings.warn("Only one unique sample. "

                          "Not using MNNGraph")

    @@ -239,7 +240,7 @@ def Graph(data,
  
            else:

                msg = msg + " and PyGSP inheritance"

        tasklogger.log_debug(msg)

        _logger.debug(msg)

        class_names = [p.__name__.replace("Graph", "") for p in parent_classes]

        try:

    @@ -257,7 +258,7 @@ def Graph(data,
  
                    pass

        # build graph and return

        tasklogger.log_debug("Initializing {} with arguments {}".format(

        _logger.debug("Initializing {} with arguments {}".format(

            parent_classes,

            ", ".join(["{}='{}'".format(key, value)

                       for key, value in params.items()

graphtools/base.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -11,9 +11,9 @@
  
    from scipy import sparse

    import warnings

    import numbers

    import tasklogger

    import pickle

    import sys

    import tasklogger

    try:

        import pandas as pd

    @@ -29,6 +29,8 @@
  
    from . import utils

    _logger = tasklogger.get_tasklogger('graphtools')

    class Base(object):

        """Class that deals with key-word arguments but is otherwise

    @@ -179,7 +181,7 @@ def _parse_n_pca_threshold(self, data, n_pca, rank_threshold):
  
                n_pca = None

            elif n_pca is True:  # notify that we're going to estimate rank.

                n_pca = 'auto'

                tasklogger.log_info("Estimating n_pca from matrix rank. "

                _logger.info("Estimating n_pca from matrix rank. "

                                    "Supply an integer n_pca "

                                    "for fixed amount.")

            if not any([isinstance(n_pca, numbers.Number),

    @@ -233,45 +235,44 @@ def _reduce_data(self):
  
            Reduced data matrix

            """

            if self.n_pca is not None and (self.n_pca == 'auto' or self.n_pca < self.data.shape[1]):

                tasklogger.log_start("PCA")

                n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca

                if sparse.issparse(self.data):

                    if isinstance(self.data, sparse.coo_matrix) or \

                            isinstance(self.data, sparse.lil_matrix) or \

                            isinstance(self.data, sparse.dok_matrix):

                        self.data = self.data.tocsr()

                    self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)

                else:

                    self.data_pca = PCA(n_pca,

                                        svd_solver='randomized',

                                        random_state=self.random_state)

                self.data_pca.fit(self.data)

                if self.n_pca == 'auto':

                    s = self.data_pca.singular_values_

                    smax = s.max()

                    if self.rank_threshold == 'auto':

                        threshold = smax * \

                            np.finfo(self.data.dtype).eps * max(self.data.shape)

                        self.rank_threshold = threshold

                    threshold = self.rank_threshold

                    gate = np.where(s >= threshold)[0]

                    self.n_pca = gate.shape[0]

                    if self.n_pca == 0:

                        raise ValueError("Supplied threshold {} was greater than "

                                         "maximum singular value {} "

                                         "for the data matrix".format(threshold, smax))

                    tasklogger.log_info(

                        "Using rank estimate of {} as n_pca".format(self.n_pca))

                    # reset the sklearn operator

                    op = self.data_pca  # for line-width brevity..

                    op.components_ = op.components_[gate, :]

                    op.explained_variance_ = op.explained_variance_[gate]

                    op.explained_variance_ratio_ = op.explained_variance_ratio_[

                        gate]

                    op.singular_values_ = op.singular_values_[gate]

                    self.data_pca = op  # im not clear if this is needed due to assignment rules

                data_nu = self.data_pca.transform(self.data)

                tasklogger.log_complete("PCA")

                with _logger.task("PCA"):

                    n_pca = self.data.shape[1] - 1 if self.n_pca == 'auto' else self.n_pca

                    if sparse.issparse(self.data):

                        if isinstance(self.data, sparse.coo_matrix) or \

                                isinstance(self.data, sparse.lil_matrix) or \

                                isinstance(self.data, sparse.dok_matrix):

                            self.data = self.data.tocsr()

                        self.data_pca = TruncatedSVD(n_pca, random_state=self.random_state)

                    else:

                        self.data_pca = PCA(n_pca,

                                            svd_solver='randomized',

                                            random_state=self.random_state)

                    self.data_pca.fit(self.data)

                    if self.n_pca == 'auto':

                        s = self.data_pca.singular_values_

                        smax = s.max()

                        if self.rank_threshold == 'auto':

                            threshold = smax * \

                                np.finfo(self.data.dtype).eps * max(self.data.shape)

                            self.rank_threshold = threshold

                        threshold = self.rank_threshold

                        gate = np.where(s >= threshold)[0]

                        self.n_pca = gate.shape[0]

                        if self.n_pca == 0:

                            raise ValueError("Supplied threshold {} was greater than "

                                             "maximum singular value {} "

                                             "for the data matrix".format(threshold, smax))

                        _logger.info(

                            "Using rank estimate of {} as n_pca".format(self.n_pca))

                        # reset the sklearn operator

                        op = self.data_pca  # for line-width brevity..

                        op.components_ = op.components_[gate, :]

                        op.explained_variance_ = op.explained_variance_[gate]

                        op.explained_variance_ratio_ = op.explained_variance_ratio_[

                            gate]

                        op.singular_values_ = op.singular_values_[gate]

                        self.data_pca = op  # im not clear if this is needed due to assignment rules

                    data_nu = self.data_pca.transform(self.data)

                return data_nu

            else:

                data_nu = self.data

    @@ -472,10 +473,10 @@ def __init__(self,
  
            self.anisotropy = anisotropy

            if initialize:

                tasklogger.log_debug("Initializing kernel...")

                _logger.debug("Initializing kernel...")

                self.K

            else:

                tasklogger.log_debug("Not initializing kernel.")

                _logger.debug("Not initializing kernel.")

            super().__init__(**kwargs)

        def _check_symmetrization(self, kernel_symm, theta):

    @@ -524,18 +525,18 @@ def _build_kernel(self):
  
        def symmetrize_kernel(self, K):

            # symmetrize

            if self.kernel_symm == "+":

                tasklogger.log_debug("Using addition symmetrization.")

                _logger.debug("Using addition symmetrization.")

                K = (K + K.T) / 2

            elif self.kernel_symm == "*":

                tasklogger.log_debug("Using multiplication symmetrization.")

                _logger.debug("Using multiplication symmetrization.")

                K = K.multiply(K.T)

            elif self.kernel_symm == 'mnn':

                tasklogger.log_debug(

                _logger.debug(

                    "Using mnn symmetrization (theta = {}).".format(self.theta))

                K = self.theta * utils.elementwise_minimum(K, K.T) + \

                    (1 - self.theta) * utils.elementwise_maximum(K, K.T)

            elif self.kernel_symm is None:

                tasklogger.log_debug("Using no symmetrization.")

                _logger.debug("Using no symmetrization.")

                pass

            else:

                # this should never happen

    @@ -729,12 +730,12 @@ def to_pygsp(self, **kwargs):
  
        def to_igraph(self, attribute="weight", **kwargs):

            """Convert to an igraph Graph

            Uses the igraph.Graph.Weighted_Adjacency constructor

            Uses the igraph.Graph constructor

            Parameters

            ----------

            attribute : str, optional (default: "weight")

            kwargs : additional arguments for igraph.Graph.Weighted_Adjacency

            kwargs : additional arguments for igraph.Graph

            """

            try:

                import igraph as ig

    @@ -747,8 +748,13 @@ def to_igraph(self, attribute="weight", **kwargs):
  
                # not a pygsp graph

                W = self.K.copy()

                W = utils.set_diagonal(W, 0)

            return ig.Graph.Weighted_Adjacency(utils.to_array(W).tolist(),

                                               attr=attribute, **kwargs)

            sources, targets = W.nonzero()

            edgelist = list(zip(sources, targets))

            g = ig.Graph(W.shape[0], edgelist, **kwargs)

            weights = W[W.nonzero()]

            weights = utils.to_array(weights)

            g.es[attribute] = weights.flatten().tolist()

            return g

        def to_pickle(self, path):

            """Save the current Graph to a pickle.

    @@ -787,10 +793,10 @@ def _check_shortest_path_distance(self, distance):
  
        def _default_shortest_path_distance(self):

            if not self.weighted:

                distance = 'data'

                tasklogger.log_info("Using ambient data distances.")

                _logger.info("Using ambient data distances.")

            else:

                distance = 'affinity'

                tasklogger.log_info("Using negative log affinity distances.")

                _logger.info("Using negative log affinity distances.")

            return distance

        def shortest_path(self, method='auto', distance=None):

    @@ -954,7 +960,7 @@ def __init__(self, data,
  
            # kwargs are ignored

            self.n_jobs = n_jobs

            self.verbose = verbose

            tasklogger.set_level(verbose)

            _logger.set_level(verbose)

            super().__init__(data, **kwargs)

        def get_params(self):

    @@ -1117,6 +1123,6 @@ def set_params(self, **params):
  
                self.n_jobs = params['n_jobs']

            if 'verbose' in params:

                self.verbose = params['verbose']

                tasklogger.set_level(self.verbose)

                _logger.set_level(self.verbose)

            super().set_params(**params)

            return self

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

graphtools v1.3.1 #45

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!