Skip to content

Commit

Permalink
Merge pull request #12 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
scprep v0.7.2
  • Loading branch information
scottgigante committed Oct 15, 2018
2 parents b162c33 + 455a882 commit 9da559e
Show file tree
Hide file tree
Showing 25 changed files with 521 additions and 312 deletions.
7 changes: 3 additions & 4 deletions python/doc/source/conf.py
Expand Up @@ -31,12 +31,11 @@
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = ['sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'sphinx.ext.napoleon',
extensions = ['sphinx.ext.napoleon',
'sphinx.ext.doctest',
'sphinx.ext.coverage',
'sphinx.ext.viewcode']
'sphinx.ext.viewcode',
'autodocsumm']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['ytemplates']
Expand Down
10 changes: 10 additions & 0 deletions python/doc/source/reference.rst
Expand Up @@ -5,6 +5,7 @@ Data Input/Output
-----------------

.. automodule:: scprep.io
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -13,6 +14,7 @@ Filtering
---------

.. automodule:: scprep.filter
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -21,6 +23,7 @@ Normalization
-------------

.. automodule:: scprep.normalize
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -29,6 +32,7 @@ Transformation
--------------

.. automodule:: scprep.transform
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -37,6 +41,7 @@ Measurements
------------

.. automodule:: scprep.measure
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -45,6 +50,7 @@ Statistics
----------

.. automodule:: scprep.stats
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -53,6 +59,7 @@ Plotting
--------

.. automodule:: scprep.plot
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -61,6 +68,7 @@ Dimensionality Reduction
------------------------

.. automodule:: scprep.reduce
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -69,6 +77,7 @@ Utilities
---------

.. automodule:: scprep.utils
:autosummary:
:members:
:inherited-members:
:show-inheritance:
Expand All @@ -77,6 +86,7 @@ HDF5
----

.. automodule:: scprep.hdf5
:autosummary:
:members:
:inherited-members:
:show-inheritance:
1 change: 1 addition & 0 deletions python/doc/source/requirements.txt
Expand Up @@ -9,3 +9,4 @@ matplotlib
tables
sphinx
sphinxcontrib-napoleon
autodocsumm
3 changes: 2 additions & 1 deletion python/requirements.txt
Expand Up @@ -2,4 +2,5 @@ numpy>=1.10.0
scipy>=0.18.0
scikit-learn>=0.19.1
future
pandas>=0.19.0
pandas>=0.19.0
decorator
4 changes: 2 additions & 2 deletions python/scprep/hdf5.py
Expand Up @@ -126,7 +126,7 @@ def get_node(f, node):
Returns
-------
g : tables.Group, h5py.Group, tables.CArray or ???
g : tables.Group, h5py.Group, tables.CArray or hdf5.Dataset
Requested HDF5 node.
"""
if _is_h5py(f, allow_dataset=False):
Expand All @@ -146,7 +146,7 @@ def get_values(dataset):
Parameters
----------
dataset : tables.CArray or h5py.???
dataset : tables.CArray or h5py.Dataset
Returns
-------
Expand Down
30 changes: 26 additions & 4 deletions python/scprep/plot.py
@@ -1,5 +1,6 @@
import numpy as np
from decorator import decorator
import os
try:
import matplotlib.pyplot as plt
import matplotlib as mpl
Expand Down Expand Up @@ -28,6 +29,25 @@ def _mpl_is_gui_backend():
return True


@_with_matplotlib
def show(fig):
"""Show a matplotlib Figure correctly, regardless of platform
If running a Jupyter notebook, we avoid running `fig.show`. If running
in Windows, it is necessary to run `plt.show` rather than `fig.show`.
Parameters
----------
fig : matplotlib.Figure
Figure to show
"""
if _mpl_is_gui_backend():
if os.platform == "Windows":
plt.show(block=True)
else:
fig.show()


@_with_matplotlib
def histogram(data,
bins=100, log=True,
Expand Down Expand Up @@ -59,6 +79,7 @@ def histogram(data,
"""
if ax is None:
fig, ax = plt.subplots(figsize=figsize)
show_fig = True
else:
try:
fig = ax.get_figure()
Expand All @@ -68,6 +89,7 @@ def histogram(data,
"Got {}".format(type(ax)))
else:
raise e
show_fig = False
if log == 'x' or log is True:
bins = np.logspace(np.log10(max(np.min(data), 1)),
np.log10(np.max(data)),
Expand All @@ -83,8 +105,8 @@ def histogram(data,
data, cutoff, percentile, required=False)
if cutoff is not None:
ax.axvline(cutoff, color='red')
if _mpl_is_gui_backend():
fig.show()
if show_fig:
show(fig)


@_with_matplotlib
Expand Down Expand Up @@ -119,7 +141,7 @@ def plot_library_size(data,
"""
histogram(measure.library_size(data),
cutoff=cutoff, percentile=percentile,
bins=bins, log=log, ax=ax, figsize=figsize)
bins=bins, log=log, ax=ax, figsize=figsize, **kwargs)


@_with_matplotlib
Expand Down Expand Up @@ -160,4 +182,4 @@ def plot_gene_set_expression(data, genes,
histogram(measure.gene_set_expression(
data, genes, library_size_normalize=library_size_normalize),
cutoff=cutoff, percentile=percentile,
bins=bins, log=log, ax=ax, figsize=figsize)
bins=bins, log=log, ax=ax, figsize=figsize, **kwargs)
86 changes: 52 additions & 34 deletions python/scprep/stats.py
Expand Up @@ -39,7 +39,7 @@ def EMD(x, y):
--------
>>> import scprep
>>> data = scprep.io.load_csv("my_data.csv")
>>> dremi = scprep.stats.EMD(data['GENE1'], data['GENE2'])
>>> emd = scprep.stats.EMD(data['GENE1'], data['GENE2'])
"""
x, y = _vector_coerce_two_dense(x, y)
return stats.wasserstein_distance(x, y)
Expand All @@ -48,9 +48,9 @@ def EMD(x, y):
def mutual_information(x, y, bins=8):
"""Mutual information score with set number of bins
Helper function for sklearn.metrics.mutual_info_score that builds your
contingency table for you using a set number of bins.
Credit: Warran Weckesser https://stackoverflow.com/a/20505476/3996580
Helper function for `sklearn.metrics.mutual_info_score` that builds a
contingency table over a set number of bins.
Credit: `Warran Weckesser <https://stackoverflow.com/a/20505476/3996580>`_.
Parameters
Expand All @@ -71,7 +71,7 @@ def mutual_information(x, y, bins=8):
--------
>>> import scprep
>>> data = scprep.io.load_csv("my_data.csv")
>>> dremi = scprep.stats.mutual_information(data['GENE1'], data['GENE2'])
>>> mi = scprep.stats.mutual_information(data['GENE1'], data['GENE2'])
"""
x, y = _vector_coerce_two_dense(x, y)
c_xy = np.histogram2d(x, y, bins)[0]
Expand All @@ -80,25 +80,22 @@ def mutual_information(x, y, bins=8):


def knnDREMI(x, y, k=10, n_bins=20, n_mesh=3, n_jobs=1,
plot=False, **kwargs):
plot=False, return_drevi=False, **kwargs):
"""kNN conditional Density Resampled Estimate of Mutual Information
Calculates k-Nearest Neighbor conditional Density Resampled Estimate of
Mutual Information as defined in Van Dijk et al. 2018
(doi:10.1016/j.cell.2018.05.061)
kNN-DREMI is an adaptation of DREMI (Krishnaswamy et al. 2014,
doi:10.1126/science.1250689) for single cell RNA-sequencing data. DREMI
captures the functional relationship between two genes across their entire
dynamic range. The key change to kNN-DREMI is the replacement of the heat
diffusion-based kernel-density estimator from (Botev et al., 2010) by a
k-nearest neighbor-based density estimator (Sricharan et al., 2012), which
has been shown to be an effective method for sparse and high dimensional
datasets.
Mutual Information as defined in Van Dijk et al, 2018. [1]_
kNN-DREMI is an adaptation of DREMI (Krishnaswamy et al. 2014, [2]_) for
single cell RNA-sequencing data. DREMI captures the functional relationship
between two genes across their entire dynamic range. The key change to
kNN-DREMI is the replacement of the heat diffusion-based kernel-density
estimator from Botev et al., 2010 [3]_ by a k-nearest neighbor-based
density estimator (Sricharan et al., 2012 [4]_), which has been shown to be
an effective method for sparse and high dimensional datasets.
Note that kNN-DREMI, like Mutual Information and DREMI, is not symmetric.
Here we are estimating I(Y|X). There are many good articles about mutual
information on the web.
Here we are estimating I(Y|X).
Parameters
----------
Expand All @@ -117,12 +114,18 @@ def knnDREMI(x, y, k=10, n_bins=20, n_mesh=3, n_jobs=1,
plot : bool, optional (default: False)
If True, DREMI create plots of the data like those seen in
Fig 5C/D of van Dijk et al. 2018. (doi:10.1016/j.cell.2018.05.061).
return_drevi : bool, optional (default: False)
If True, return the DREVI normalized density matrix in addition
to the DREMI score.
**kwargs : additional arguments for `scprep.stats.plot_knnDREMI`
Returns
-------
dremi : float
kNN condtional Density resampled estimate of mutual information
drevi : np.ndarray
DREVI normalized density matrix. Only returned if `return_drevi`
is True.
Examples
--------
Expand All @@ -131,6 +134,20 @@ def knnDREMI(x, y, k=10, n_bins=20, n_mesh=3, n_jobs=1,
>>> dremi = scprep.stats.knnDREMI(data['GENE1'], data['GENE2'],
... plot=True,
... filename='dremi.png')
References
----------
.. [1] van Dijk D *et al.* (2018),
*Recovering Gene Interactions from Single-Cell Data Using Data
Diffusion*, `Cell <https://doi.org/10.1016/j.cell.2018.05.061>`_.
.. [2] Krishnaswamy S *et al.* (2014),
*Conditional density-based analysis of T cell signaling in single-cell
data*, `Science <https://doi.org/10.1126/science.1250689>`_.
.. [3] Botev ZI *et al*. (2010), *Kernel density estimation via diffusion*,
`The Annals of Statistics <https://doi.org/10.1214/10-AOS799>`_.
.. [4] Sricharan K *et al*. (2012), *Estimation of nonlinear functionals of
densities with confidence*, `IEEE Transactions on Information Theory
<https://doi.org/10.1109/TIT.2012.2195549>`_.
"""
x, y = _vector_coerce_two_dense(x, y)

Expand Down Expand Up @@ -182,10 +199,10 @@ def knnDREMI(x, y, k=10, n_bins=20, n_mesh=3, n_jobs=1,

# Calculate conditional entropy
# NB: not using thresholding here; entr(M) calcs -x*log(x) elementwise
bin_density_norm = bin_density_norm = bin_density / \
drevi = bin_density / \
np.sum(bin_density, axis=0) # columns sum to 1
# calc entropy of each column
cond_entropies = stats.entropy(bin_density_norm, base=2)
cond_entropies = stats.entropy(drevi, base=2)

# Mutual information (not normalized)
marginal_entropy = stats.entropy(
Expand All @@ -198,33 +215,36 @@ def knnDREMI(x, y, k=10, n_bins=20, n_mesh=3, n_jobs=1,
mutual_info = marginal_entropy - conditional_entropy

# DREMI
marginal_entropy_norm = stats.entropy(np.sum(bin_density_norm, axis=1),
marginal_entropy_norm = stats.entropy(np.sum(drevi, axis=1),
base=2)
cond_sums_norm = np.mean(bin_density_norm)
cond_sums_norm = np.mean(drevi)
conditional_entropy_norm = np.sum(cond_entropies * cond_sums_norm)

dremi = marginal_entropy_norm - conditional_entropy_norm

if plot is True:
if plot:
plot_knnDREMI(dremi, mutual_info,
x, y, n_bins, n_mesh,
density, bin_density, bin_density_norm, **kwargs)
return dremi
density, bin_density, drevi, **kwargs)
if return_drevi:
return dremi, drevi
else:
return dremi


@plot._with_matplotlib
def plot_knnDREMI(dremi, mutual_info, x, y, n_bins, n_mesh,
density, bin_density, bin_density_norm,
density, bin_density, drevi,
figsize=(12, 3.5), filename=None,
xlabel="Feature 1", ylabel="Feature 2",
title_fontsize=18, label_fontsize=16,
dpi=150):
"""Plot results of DREMI
Create plots of the data like those seen in
Fig 5C/D of van Dijk et al. 2018. (doi:10.1016/j.cell.2018.05.061).
Note that this function is not designed to be called manually. Instead create
plots by running `scprep.stats.knnDREMI` with `plot=True`.
Fig 5C/D of van Dijk et al. 2018. [1]_
Note that this function is not designed to be called manually. Instead
create plots by running `scprep.stats.knnDREMI` with `plot=True`.
Parameters
----------
Expand Down Expand Up @@ -268,8 +288,7 @@ def plot_knnDREMI(dremi, mutual_info, x, y, n_bins, n_mesh,
axes[1].set_xlabel(xlabel, fontsize=label_fontsize)

# Plot joint probability
raw_density_data = bin_density
axes[2].imshow(raw_density_data,
axes[2].imshow(bin_density,
cmap="inferno", origin="lower", aspect="auto")
axes[2].set_xticks([])
axes[2].set_yticks([])
Expand All @@ -278,8 +297,7 @@ def plot_knnDREMI(dremi, mutual_info, x, y, n_bins, n_mesh,
axes[2].set_xlabel(xlabel, fontsize=label_fontsize)

# Plot conditional probability
raw_density_data = bin_density_norm
axes[3].imshow(raw_density_data,
axes[3].imshow(drevi,
cmap="inferno", origin="lower", aspect="auto")
axes[3].set_xticks([])
axes[3].set_yticks([])
Expand Down
2 changes: 1 addition & 1 deletion python/scprep/version.py
@@ -1,4 +1,4 @@
# author: Scott Gigante <scott.gigante@yale.edu>
# (C) 2018 Krishnaswamy Lab GPLv2

__version__ = "0.7.1"
__version__ = "0.7.2"

0 comments on commit 9da559e

Please sign in to comment.