From 012de21ab6aef8f9a0b014100ee74585437a249d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 10 Feb 2021 17:11:07 -0500 Subject: [PATCH 01/44] bump version --- scprep/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scprep/version.py b/scprep/version.py index d6e6e024..434128b5 100644 --- a/scprep/version.py +++ b/scprep/version.py @@ -1,4 +1,4 @@ # author: Scott Gigante # (C) 2018 Krishnaswamy Lab GPLv2 -__version__ = "1.0.13" +__version__ = "1.0.14a0" From d7ed9d9ec844e16d7c4da27c0440caa076e6b112 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 09:55:31 -0500 Subject: [PATCH 02/44] test py3.5, r oldrel & devel --- .github/workflows/run_tests.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 815ce59c..945bce39 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -34,9 +34,9 @@ jobs: - name: Install tools run: | - python -m pip install --upgrade "pip<=21.0" - pip install --use-deprecated=legacy-resolver -U wheel setuptools - pip install --use-deprecated=legacy-resolver -U black flake8 + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U black flake8 hacking - name: Lint with Black run: | black . --check --diff @@ -54,9 +54,10 @@ jobs: fail-fast: false matrix: config: - - {name: 'current', os: ubuntu-latest, python: '3.8', r: 'release' } - - {name: 'prev', os: ubuntu-latest, python: '3.7', r: 'release' } - - {name: 'old', os: ubuntu-latest, python: '3.6', r: 'release' } + - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'devel' } + - {name: '3.7', os: ubuntu-latest, python: '3.7', r: 'release' } + - {name: '3.6', os: ubuntu-latest, python: '3.6', r: 'release' } + - {name: '3.5', os: ubuntu-latest, python: '3.5', r: 'oldrel' } steps: - name: Cancel Previous Runs @@ -111,9 +112,9 @@ jobs: - name: Install package & dependencies run: | - python -m pip install --upgrade "pip<=21.0" - pip install --use-deprecated=legacy-resolver -U wheel setuptools - pip install --use-deprecated=legacy-resolver -U .[test,r] + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U .[test] python -c "import scprep" - name: Run tests From 0be126f385386478b6a78d704bc3f538a4f2c965 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:07:18 -0500 Subject: [PATCH 03/44] remove author tags --- scprep/__init__.py | 3 --- scprep/_lazyload.py | 11 +++++++---- scprep/filter.py | 3 --- scprep/io/__init__.py | 3 --- scprep/io/csv.py | 3 --- scprep/io/fcs.py | 3 --- scprep/io/hdf5.py | 2 -- scprep/io/mtx.py | 3 --- scprep/io/tenx.py | 3 --- scprep/io/utils.py | 3 --- scprep/normalize.py | 3 --- scprep/sanitize.py | 3 --- scprep/stats.py | 3 --- scprep/transform.py | 3 --- scprep/version.py | 3 --- 15 files changed, 7 insertions(+), 45 deletions(-) diff --git a/scprep/__init__.py b/scprep/__init__.py index b4b3b3e4..cc9478ee 100644 --- a/scprep/__init__.py +++ b/scprep/__init__.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - from .version import __version__ import scprep.io import scprep.io.hdf5 diff --git a/scprep/_lazyload.py b/scprep/_lazyload.py index 1ecb13f2..c79f4f71 100644 --- a/scprep/_lazyload.py +++ b/scprep/_lazyload.py @@ -1,10 +1,13 @@ import importlib import sys -# Key: -# { module : [{submodule1:[subsubmodule1, subsubmodule2]}, submodule2] } -# each module loads submodules on initialization but is only imported -# and loads methods/classes when these are accessed +"""Key: + +{ module : [{submodule1:[subsubmodule1, subsubmodule2]}, submodule2] } + +Each module loads submodules on initialization but is only imported +and loads methods/classes when these are accessed. +""" _importspec = { "matplotlib": [ "colors", diff --git a/scprep/filter.py b/scprep/filter.py index 02b78ff3..6b43aab5 100644 --- a/scprep/filter.py +++ b/scprep/filter.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import numpy as np import pandas as pd from scipy import sparse diff --git a/scprep/io/__init__.py b/scprep/io/__init__.py index d34bd92c..5e592b64 100644 --- a/scprep/io/__init__.py +++ b/scprep/io/__init__.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - from .csv import load_csv, load_tsv from .tenx import load_10X, load_10X_zip, load_10X_HDF5 from .fcs import load_fcs diff --git a/scprep/io/csv.py b/scprep/io/csv.py index 0677a48f..3c8c4079 100644 --- a/scprep/io/csv.py +++ b/scprep/io/csv.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import pandas as pd from .utils import _matrix_to_data_frame diff --git a/scprep/io/fcs.py b/scprep/io/fcs.py index e8d29a8a..f6ca8d8e 100644 --- a/scprep/io/fcs.py +++ b/scprep/io/fcs.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import pandas as pd import numpy as np import struct diff --git a/scprep/io/hdf5.py b/scprep/io/hdf5.py index 0abc9291..92fe66a3 100644 --- a/scprep/io/hdf5.py +++ b/scprep/io/hdf5.py @@ -1,5 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 from decorator import decorator from .._lazyload import tables diff --git a/scprep/io/mtx.py b/scprep/io/mtx.py index 282c1170..595227e2 100644 --- a/scprep/io/mtx.py +++ b/scprep/io/mtx.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import scipy.io as sio from scipy import sparse import pandas as pd diff --git a/scprep/io/tenx.py b/scprep/io/tenx.py index a066fb7c..113adf7b 100644 --- a/scprep/io/tenx.py +++ b/scprep/io/tenx.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import pandas as pd import scipy.io as sio import scipy.sparse as sp diff --git a/scprep/io/utils.py b/scprep/io/utils.py index e68b77db..f6b3cce2 100644 --- a/scprep/io/utils.py +++ b/scprep/io/utils.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import pandas as pd import scipy.sparse as sp import warnings diff --git a/scprep/normalize.py b/scprep/normalize.py index 1cd91562..9d6a8bd3 100644 --- a/scprep/normalize.py +++ b/scprep/normalize.py @@ -1,6 +1,3 @@ -# author: Daniel Burkhardt -# (C) 2018 Krishnaswamy Lab GPLv2 - from sklearn.preprocessing import normalize import numpy as np from scipy import sparse diff --git a/scprep/sanitize.py b/scprep/sanitize.py index 0140d4bc..fa6f0d9e 100644 --- a/scprep/sanitize.py +++ b/scprep/sanitize.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import numpy as np import pandas as pd import warnings diff --git a/scprep/stats.py b/scprep/stats.py index f441fd79..dd06f870 100644 --- a/scprep/stats.py +++ b/scprep/stats.py @@ -1,6 +1,3 @@ -# author: Daniel Burkhardt -# (C) 2018 Krishnaswamy Lab GPLv2 - import numbers import numpy as np import pandas as pd diff --git a/scprep/transform.py b/scprep/transform.py index 51b075e5..b486780a 100644 --- a/scprep/transform.py +++ b/scprep/transform.py @@ -1,6 +1,3 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - import numpy as np from scipy import sparse import warnings diff --git a/scprep/version.py b/scprep/version.py index 22d9cacf..9eb1ebec 100644 --- a/scprep/version.py +++ b/scprep/version.py @@ -1,4 +1 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - __version__ = "1.0.11" From edf35675fe431e4763e320df9e88e535d130d92f Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:08:03 -0500 Subject: [PATCH 04/44] document with_HDF5 --- scprep/io/hdf5.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scprep/io/hdf5.py b/scprep/io/hdf5.py index 92fe66a3..f15aaa06 100644 --- a/scprep/io/hdf5.py +++ b/scprep/io/hdf5.py @@ -13,6 +13,7 @@ @decorator def with_HDF5(fun, *args, **kwargs): + """Ensure that HDF5 is available to run the decorated function.""" if not (utils._try_import("tables") or utils._try_import("h5py")): raise ModuleNotFoundError( "Found neither tables nor h5py. " From 95a33129b1405452a06e6190e949f8bcc6ddffd4 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:15:25 -0500 Subject: [PATCH 05/44] docs --- scprep/_lazyload.py | 21 +++++++++++++++++++-- scprep/filter.py | 8 ++++---- scprep/stats.py | 2 +- scprep/transform.py | 6 +++--- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/scprep/_lazyload.py b/scprep/_lazyload.py index c79f4f71..7089cd54 100644 --- a/scprep/_lazyload.py +++ b/scprep/_lazyload.py @@ -34,7 +34,20 @@ class AliasModule(object): + """Wrapper around Python module to allow lazy loading.""" + def __init__(self, name, members=None): + """Initialize a module without loading it. + + Parameters + ---------- + name : str + Module name + members : list[str, dict] + List of submodules to be loaded as AliasModules. If a dict, the submodule + is loaded with subsubmodules corresponding to the dictionary values; + if a string, the submodule has no subsubmodules. + """ # easy access to AliasModule members to avoid recursionerror super_setattr = super().__setattr__ if members is None: @@ -59,6 +72,7 @@ def __init__(self, name, members=None): @property def __loaded_module__(self): + """Load the module, or retrieve it if already loaded.""" # easy access to AliasModule members to avoid recursionerror super_getattr = super().__getattribute__ name = super_getattr("__module_name__") @@ -70,6 +84,7 @@ def __loaded_module__(self): return sys.modules[name] def __getattribute__(self, attr): + """Access AliasModule members.""" # easy access to AliasModule members to avoid recursionerror super_getattr = super().__getattribute__ if attr in super_getattr("__submodules__"): @@ -88,8 +103,10 @@ def __getattribute__(self, attr): return getattr(super_getattr("__loaded_module__"), attr) def __setattr__(self, name, value): - # allows monkey-patching - # easy access to AliasModule members to avoid recursionerror + """Allow monkey-patching. + + Gives easy access to AliasModule members to avoid recursionerror. + """ super_getattr = super().__getattribute__ return setattr(super_getattr("__loaded_module__"), name, value) diff --git a/scprep/filter.py b/scprep/filter.py index 6b43aab5..e3d492c6 100644 --- a/scprep/filter.py +++ b/scprep/filter.py @@ -8,7 +8,7 @@ from . import utils, measure, select -def remove_empty_genes(data, *extra_data): +def remove_empty_genes(data, *extra_data): # noqa warnings.warn( "`scprep.filter.remove_empty_genes` is deprecated. " "Use `scprep.filter.filter_empty_genes` instead.", @@ -17,7 +17,7 @@ def remove_empty_genes(data, *extra_data): return filter_empty_genes(data, *extra_data) -def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): +def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): # noqa warnings.warn( "`scprep.filter.remove_rare_genes` is deprecated. " "Use `scprep.filter.filter_rare_genes` instead.", @@ -26,7 +26,7 @@ def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): return filter_rare_genes(data, *extra_data, cutoff=cutoff, min_cells=min_cells) -def remove_empty_cells(data, *extra_data, sample_labels=None): +def remove_empty_cells(data, *extra_data, sample_labels=None): # noqa warnings.warn( "`scprep.filter.remove_empty_cells` is deprecated. " "Use `scprep.filter.filter_empty_cells` instead.", @@ -35,7 +35,7 @@ def remove_empty_cells(data, *extra_data, sample_labels=None): return filter_empty_cells(data, *extra_data, sample_labels=sample_labels) -def remove_duplicates(data, *extra_data, sample_labels=None): +def remove_duplicates(data, *extra_data, sample_labels=None): # noqa warnings.warn( "`scprep.filter.remove_duplicates` is deprecated. " "Use `scprep.filter.filter_duplicates` instead.", diff --git a/scprep/stats.py b/scprep/stats.py index dd06f870..b478ef85 100644 --- a/scprep/stats.py +++ b/scprep/stats.py @@ -404,7 +404,7 @@ def _preprocess_test_matrices(X, Y): def mean_difference(X, Y): - """Calculate the mean difference in genes between two datasets + """Calculate the mean difference in genes between two datasets. In the case where the data has been log normalized, this is equivalent to fold change. diff --git a/scprep/transform.py b/scprep/transform.py index b486780a..23a93e4f 100644 --- a/scprep/transform.py +++ b/scprep/transform.py @@ -105,7 +105,7 @@ def arcsinh(data, cofactor=5): return utils.matrix_transform(data, np.arcsinh) -def sqrt_transform(*args, **kwargs): +def sqrt_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.sqrt_transform is deprecated. Please use " "scprep.transform.sqrt in future.", @@ -114,7 +114,7 @@ def sqrt_transform(*args, **kwargs): return sqrt(*args, **kwargs) -def log_transform(*args, **kwargs): +def log_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.log_transform is deprecated. Please use " "scprep.transform.log in future.", @@ -123,7 +123,7 @@ def log_transform(*args, **kwargs): return log(*args, **kwargs) -def arcsinh_transform(*args, **kwargs): +def arcsinh_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.arcsinh_transform is deprecated. Please " "use scprep.transform.arcsinh in future.", From 59ce44ed6483fce36f74905f27fd6b28b2bea35f Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:16:45 -0500 Subject: [PATCH 06/44] bioc is broken on oldrel --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 945bce39..3d9ddc92 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -57,7 +57,7 @@ jobs: - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'devel' } - {name: '3.7', os: ubuntu-latest, python: '3.7', r: 'release' } - {name: '3.6', os: ubuntu-latest, python: '3.6', r: 'release' } - - {name: '3.5', os: ubuntu-latest, python: '3.5', r: 'oldrel' } + - {name: '3.5', os: ubuntu-latest, python: '3.5', r: 'release' } steps: - name: Cancel Previous Runs From f015707659345dc975a943d0da25f137bbbd6619 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:30:09 -0500 Subject: [PATCH 07/44] less noisy conversion --- scprep/run/conversion.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scprep/run/conversion.py b/scprep/run/conversion.py index b7c97c90..6de2b745 100644 --- a/scprep/run/conversion.py +++ b/scprep/run/conversion.py @@ -46,7 +46,12 @@ def _pysce2rpy(pyobject): def _is_r_object(obj): - return "rpy2.robjects" in str(type(obj)) or obj is rpy2.rinterface.NULL + return "rpy2.robjects" in str(type(obj)) or "rpy2.rinterface" in str(type(obj)) + + +def _is_builtin(obj): + """Check if an object need not be converted.""" + return isinstance(obj, (float, int, str, bool)) @utils._with_pkg(pkg="rpy2", min_version="3.0") @@ -119,6 +124,6 @@ def py2rpy(pyobject): pass else: break - if not _is_r_object(pyobject): + if not (_is_r_object(pyobject) or _is_builtin(pyobject)): warnings.warn("Object not converted: {}".format(pyobject), RuntimeWarning) return pyobject From 986cdd60cea483e9a34c89b4cea0e1520a5330cf Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:34:12 -0500 Subject: [PATCH 08/44] docs --- scprep/plot/utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scprep/plot/utils.py b/scprep/plot/utils.py index 4ab07191..6f450e60 100644 --- a/scprep/plot/utils.py +++ b/scprep/plot/utils.py @@ -146,16 +146,21 @@ def parse_fontsize(size=None, default=None): class temp_fontsize(object): + """Context manager to temporarily change matplotlib font size.""" + def __init__(self, size=None): + """Initialize the context manager.""" if size is None: size = plt.rcParams["font.size"] self.size = size def __enter__(self): + """Temporarily set the font size.""" self.old_size = plt.rcParams["font.size"] plt.rcParams["font.size"] = self.size def __exit__(self, type, value, traceback): + """Change the font size back to default.""" plt.rcParams["font.size"] = self.old_size From c7d0dcd33f5e26d1d92b495e0b45bc7890d0d13f Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:34:32 -0500 Subject: [PATCH 09/44] don't test on devel --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 3edc66ff..85a502e8 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -55,7 +55,7 @@ jobs: fail-fast: false matrix: config: - - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'devel' } + - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'release' } - {name: '3.7', os: ubuntu-latest, python: '3.7', r: 'release' } - {name: '3.6', os: ubuntu-latest, python: '3.6', r: 'release' } - {name: '3.5', os: ubuntu-latest, python: '3.5', r: 'release' } From c7d6c3b34617f55cf8ac36a5cec9e93e5cfbd1d3 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:34:52 -0500 Subject: [PATCH 10/44] add pre-commit --- .github/workflows/pre-commit.yml | 44 ++++++++++++++++++++++++++++++++ .pre-commit-config.yaml | 27 ++++++++++++++++++++ setup.cfg | 5 ++++ 3 files changed, 76 insertions(+) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..efe204a4 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,44 @@ +name: pre-commit +on: + push: + branches-ignore: + - 'master' + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + with: + python-version: "3.7" + architecture: "x64" + + - uses: actions/cache@v2 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}- + + - uses: pre-commit/action@v2.0.0 + continue-on-error: true + + - name: Commit files + run: | + if [[ `git status --porcelain --untracked-files=no` ]]; then + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git commit -m "pre-commit" -a + fi + + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..71079ab4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/timothycrosley/isort + rev: 5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + language_version: python3.5 + - repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5.4 + hooks: + - id: autopep8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + args: [openproblems] + additional_dependencies: ['hacking'] diff --git a/setup.cfg b/setup.cfg index 1c0a87e1..728a46b0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,3 +22,8 @@ exclude = test, doc, Snakefile + +[isort] +profile = black +force_single_line = true +force_alphabetical_sort = true From 8a7638e18e461814eb35783a566e0ef72c746799 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:39:21 -0500 Subject: [PATCH 11/44] set black to py35 --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71079ab4..0cbe03e9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: rev: 20.8b1 hooks: - id: black - language_version: python3.5 + args: ['--target-version', 'py35'] - repo: https://github.com/pre-commit/mirrors-autopep8 rev: v1.5.4 hooks: From 368552fb899fdef162ecfdfbd6a67641d9b15d7b Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 15:40:37 +0000 Subject: [PATCH 12/44] pre-commit --- .travis.yml | 1 - CODE_OF_CONDUCT.md | 2 +- README.rst | 2 +- autoblack.sh | 1 - data/test_data/gene_symbols.csv | 2 +- doc/Makefile | 2 +- doc/source/conf.py | 5 +++-- doc/source/examples/index.rst | 2 +- scprep/__init__.py | 16 ++++++------- scprep/filter.py | 11 ++++----- scprep/io/__init__.py | 14 +++++++----- scprep/io/csv.py | 6 ++--- scprep/io/download.py | 10 ++++----- scprep/io/fcs.py | 14 ++++++------ scprep/io/hdf5.py | 7 +++--- scprep/io/mtx.py | 10 ++++----- scprep/io/tenx.py | 16 ++++++------- scprep/io/utils.py | 7 +++--- scprep/measure.py | 7 +++--- scprep/normalize.py | 8 ++++--- scprep/plot/__init__.py | 14 ++++++++---- scprep/plot/colors.py | 4 ++-- scprep/plot/histogram.py | 15 ++++++++----- scprep/plot/jitter.py | 17 +++++++++----- scprep/plot/marker.py | 16 ++++++++----- scprep/plot/scatter.py | 40 +++++++++++++++------------------ scprep/plot/scree.py | 9 ++++---- scprep/plot/tools.py | 11 ++++----- scprep/plot/utils.py | 7 +++--- scprep/plot/variable_genes.py | 3 ++- scprep/reduce.py | 11 ++++----- scprep/run/__init__.py | 5 +++-- scprep/run/conversion.py | 7 +++--- scprep/run/r_function.py | 2 +- scprep/run/slingshot.py | 6 ++--- scprep/run/splatter.py | 6 ++--- scprep/sanitize.py | 3 ++- scprep/select.py | 10 ++++----- scprep/stats.py | 16 ++++++++----- scprep/transform.py | 5 +++-- scprep/utils.py | 13 +++++------ setup.py | 4 +++- test/_test_lazyload.py | 2 +- test/test_filter.py | 14 +++++++----- test/test_hdf5.py | 8 ++++--- test/test_io.py | 30 ++++++++++++------------- test/test_lazyload.py | 5 +++-- test/test_measure.py | 14 +++++++----- test/test_normalize.py | 8 ++++--- test/test_patch.py | 5 +++-- test/test_plot.py | 25 +++++++++------------ test/test_reduce.py | 12 +++++----- test/test_run.py | 21 +++++++++-------- test/test_sanitize.py | 7 ++++-- test/test_select.py | 8 ++++--- test/test_stats.py | 14 +++++++----- test/test_transform.py | 9 +++++--- test/test_utils.py | 12 ++++++---- test/tools/data.py | 4 ++-- test/tools/matrix.py | 9 ++++---- test/tools/utils.py | 13 ++++++----- 61 files changed, 325 insertions(+), 262 deletions(-) diff --git a/.travis.yml b/.travis.yml index 731c0b8d..e199304f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,4 +56,3 @@ deploy: 'on': tags: true branch: master - diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 10ef1ee7..24387d42 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -73,4 +73,4 @@ available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.ht [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq \ No newline at end of file +https://www.contributor-covenant.org/faq diff --git a/README.rst b/README.rst index 03e5cca5..41b58b28 100644 --- a/README.rst +++ b/README.rst @@ -64,7 +64,7 @@ You can use `scprep` with your single cell data as follows:: # Filter by mitochondrial expression to remove dead cells mt_genes = scprep.select.get_gene_set(data, starts_with="MT") scprep.plot.plot_gene_set_expression(data, genes=mt_genes, percentile=90) - data = scprep.filter.filter_gene_set_expression(data, genes=mt_genes, + data = scprep.filter.filter_gene_set_expression(data, genes=mt_genes, percentile=90) # Library size normalize data = scprep.normalize.library_size_normalize(data) diff --git a/autoblack.sh b/autoblack.sh index 2ac8fd82..9364a77b 100644 --- a/autoblack.sh +++ b/autoblack.sh @@ -11,4 +11,3 @@ for file in \$files; do done EOF chmod +x .git/hooks/pre-commit - diff --git a/data/test_data/gene_symbols.csv b/data/test_data/gene_symbols.csv index 08d4a237..a6f11fc4 100644 --- a/data/test_data/gene_symbols.csv +++ b/data/test_data/gene_symbols.csv @@ -1 +1 @@ -Arl8b,Cdc16,Lrrc8b,0610009B22Rik,Apoe,Asap1,Gstm5,Mok,Rps27l,Stap2,Prpf40a,Pam16,Rnf220,Tmem9b,Rdx,Nupr1l,4930455B14Rik,Sdf2l1,4921517D22Rik,Psmd8,Dynlrb2,Smco4,Gm14285,Ctdp1,Hnrnpu,Ldhb,Cep63,Unc50,Chd5,Ift22,Ankrd13a,Atp5o,Cdhr4,Atp1b3,Gm553,Phtf1,Fam177a,Dgkh,Atoh8,1700019G24Rik,Sycp1,1110008F13Rik,Pgam2,Ift74,Akr1cl,Adam30,Psmd12,mt-Cytb,Ccdc33,Gsto2,Gm16208,C1ql1,Loxhd1,Dhfr,Fgf13,D130052B06Rik,Pcdhb3,Krt32,4833407H14Rik,Abcc5,Vmn1r118,Ccdc54,Megf6,Gm13872,Erp44,Rgs13,Slc2a3,Gm11116,Gm16279,Trmt1,Gm7697,Gm11579,Zdhhc20,4930444P10Rik,RP23-449M8.6,Gm12631,Stoml3,Metrn,Tmco5,Fabp12,Gm3486,Hnf1aos2,Tmem200b,Olfr91,Gm19273,Hmga1-rs1,Prl3d3,Crygf,D030040B21Rik,Serpinb9e,Fam126b,Gm26873,Gm42435,Dmxl2,Cep164,Kansl2,Mgat1,Thrsp,Gm20821,Olfr203 \ No newline at end of file +Arl8b,Cdc16,Lrrc8b,0610009B22Rik,Apoe,Asap1,Gstm5,Mok,Rps27l,Stap2,Prpf40a,Pam16,Rnf220,Tmem9b,Rdx,Nupr1l,4930455B14Rik,Sdf2l1,4921517D22Rik,Psmd8,Dynlrb2,Smco4,Gm14285,Ctdp1,Hnrnpu,Ldhb,Cep63,Unc50,Chd5,Ift22,Ankrd13a,Atp5o,Cdhr4,Atp1b3,Gm553,Phtf1,Fam177a,Dgkh,Atoh8,1700019G24Rik,Sycp1,1110008F13Rik,Pgam2,Ift74,Akr1cl,Adam30,Psmd12,mt-Cytb,Ccdc33,Gsto2,Gm16208,C1ql1,Loxhd1,Dhfr,Fgf13,D130052B06Rik,Pcdhb3,Krt32,4833407H14Rik,Abcc5,Vmn1r118,Ccdc54,Megf6,Gm13872,Erp44,Rgs13,Slc2a3,Gm11116,Gm16279,Trmt1,Gm7697,Gm11579,Zdhhc20,4930444P10Rik,RP23-449M8.6,Gm12631,Stoml3,Metrn,Tmco5,Fabp12,Gm3486,Hnf1aos2,Tmem200b,Olfr91,Gm19273,Hmga1-rs1,Prl3d3,Crygf,D030040B21Rik,Serpinb9e,Fam126b,Gm26873,Gm42435,Dmxl2,Cep164,Kansl2,Mgat1,Thrsp,Gm20821,Olfr203 diff --git a/doc/Makefile b/doc/Makefile index 10f1a2ba..3f433e07 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -18,4 +18,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/source/conf.py b/doc/source/conf.py index 88a4b158..2e5a8e46 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -13,14 +13,15 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import glob + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os -import sys -import glob import shutil +import sys root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) sys.path.insert(0, root_dir) diff --git a/doc/source/examples/index.rst b/doc/source/examples/index.rst index 88b1e810..9ad89301 100644 --- a/doc/source/examples/index.rst +++ b/doc/source/examples/index.rst @@ -4,4 +4,4 @@ Examples .. toctree:: scatter - jitter \ No newline at end of file + jitter diff --git a/scprep/__init__.py b/scprep/__init__.py index b48e5730..9b4dd775 100644 --- a/scprep/__init__.py +++ b/scprep/__init__.py @@ -1,17 +1,17 @@ +from . import _patch from .version import __version__ + +import scprep.filter import scprep.io import scprep.io.hdf5 -import scprep.select -import scprep.filter -import scprep.normalize -import scprep.transform import scprep.measure +import scprep.normalize import scprep.plot -import scprep.sanitize -import scprep.stats import scprep.reduce import scprep.run - -from . import _patch +import scprep.sanitize +import scprep.select +import scprep.stats +import scprep.transform _patch.patch_fill_value() diff --git a/scprep/filter.py b/scprep/filter.py index e3d492c6..a07c6d29 100644 --- a/scprep/filter.py +++ b/scprep/filter.py @@ -1,11 +1,12 @@ -import numpy as np -import pandas as pd +from . import measure +from . import select +from . import utils from scipy import sparse -import warnings import numbers - -from . import utils, measure, select +import numpy as np +import pandas as pd +import warnings def remove_empty_genes(data, *extra_data): # noqa diff --git a/scprep/io/__init__.py b/scprep/io/__init__.py index 5e592b64..58a1656d 100644 --- a/scprep/io/__init__.py +++ b/scprep/io/__init__.py @@ -1,6 +1,10 @@ -from .csv import load_csv, load_tsv -from .tenx import load_10X, load_10X_zip, load_10X_HDF5 +from . import download +from . import hdf5 +from .csv import load_csv +from .csv import load_tsv from .fcs import load_fcs -from .mtx import load_mtx, save_mtx - -from . import download, hdf5 +from .mtx import load_mtx +from .mtx import save_mtx +from .tenx import load_10X +from .tenx import load_10X_HDF5 +from .tenx import load_10X_zip diff --git a/scprep/io/csv.py b/scprep/io/csv.py index 3c8c4079..327ebed5 100644 --- a/scprep/io/csv.py +++ b/scprep/io/csv.py @@ -1,7 +1,7 @@ -import pandas as pd - -from .utils import _matrix_to_data_frame from .. import utils +from .utils import _matrix_to_data_frame + +import pandas as pd def _read_csv_sparse(filename, chunksize=10000, fill_value=0.0, **kwargs): diff --git a/scprep/io/download.py b/scprep/io/download.py index f399d1b5..1c6eed87 100644 --- a/scprep/io/download.py +++ b/scprep/io/download.py @@ -1,10 +1,10 @@ -import zipfile -import tempfile +from .. import utils +from .._lazyload import requests + import os +import tempfile import urllib.request - -from .._lazyload import requests -from .. import utils +import zipfile _CHUNK_SIZE = 32768 _GOOGLE_DRIVE_URL = "https://docs.google.com/uc?export=download" diff --git a/scprep/io/fcs.py b/scprep/io/fcs.py index f6ca8d8e..6982b611 100644 --- a/scprep/io/fcs.py +++ b/scprep/io/fcs.py @@ -1,14 +1,14 @@ -import pandas as pd -import numpy as np -import struct +from .. import utils +from .._lazyload import fcsparser +from .utils import _matrix_to_data_frame from io import BytesIO + +import numpy as np +import pandas as pd import string +import struct import warnings -from .utils import _matrix_to_data_frame -from .._lazyload import fcsparser -from .. import utils - def _channel_names_from_meta(meta, channel_numbers, naming="N"): try: diff --git a/scprep/io/hdf5.py b/scprep/io/hdf5.py index f15aaa06..1dca5dae 100644 --- a/scprep/io/hdf5.py +++ b/scprep/io/hdf5.py @@ -1,8 +1,7 @@ -from decorator import decorator - -from .._lazyload import tables -from .._lazyload import h5py from .. import utils +from .._lazyload import h5py +from .._lazyload import tables +from decorator import decorator try: ModuleNotFoundError diff --git a/scprep/io/mtx.py b/scprep/io/mtx.py index 595227e2..3993a7bf 100644 --- a/scprep/io/mtx.py +++ b/scprep/io/mtx.py @@ -1,10 +1,10 @@ -import scipy.io as sio +from .. import utils +from .utils import _matrix_to_data_frame from scipy import sparse -import pandas as pd -import os -from .utils import _matrix_to_data_frame -from .. import utils +import os +import pandas as pd +import scipy.io as sio def load_mtx(mtx_file, cell_axis="row", gene_names=None, cell_names=None, sparse=None): diff --git a/scprep/io/tenx.py b/scprep/io/tenx.py index 113adf7b..7eb96855 100644 --- a/scprep/io/tenx.py +++ b/scprep/io/tenx.py @@ -1,16 +1,16 @@ +from . import hdf5 +from .utils import _matrix_to_data_frame + +import numpy as np +import os import pandas as pd import scipy.io as sio import scipy.sparse as sp -import warnings -import numpy as np -import os -import zipfile +import shutil import tempfile import urllib -import shutil - -from .utils import _matrix_to_data_frame -from . import hdf5 +import warnings +import zipfile def _combine_gene_id(symbols, ids): diff --git a/scprep/io/utils.py b/scprep/io/utils.py index f6b3cce2..9ca4ea2c 100644 --- a/scprep/io/utils.py +++ b/scprep/io/utils.py @@ -1,9 +1,10 @@ +from .. import sanitize +from .. import utils + +import numpy as np import pandas as pd import scipy.sparse as sp import warnings -import numpy as np - -from .. import utils, sanitize def _parse_header(header, n_expected, header_type="gene_names"): diff --git a/scprep/measure.py b/scprep/measure.py index ecf7fef4..894770fc 100644 --- a/scprep/measure.py +++ b/scprep/measure.py @@ -1,9 +1,10 @@ +from . import select +from . import utils +from scipy import sparse + import numpy as np import pandas as pd import scipy.signal -from scipy import sparse - -from . import utils, select def library_size(data): diff --git a/scprep/normalize.py b/scprep/normalize.py index 9d6a8bd3..ce39a40c 100644 --- a/scprep/normalize.py +++ b/scprep/normalize.py @@ -1,10 +1,12 @@ +from . import measure +from . import utils +from scipy import sparse from sklearn.preprocessing import normalize + +import numbers import numpy as np -from scipy import sparse import pandas as pd -import numbers import warnings -from . import measure, utils def _get_scaled_libsize(data, rescale=10000, return_library_size=False): diff --git a/scprep/plot/__init__.py b/scprep/plot/__init__.py index 582a8a0d..b6ddf9ab 100644 --- a/scprep/plot/__init__.py +++ b/scprep/plot/__init__.py @@ -1,7 +1,13 @@ -from .scatter import scatter, scatter2d, scatter3d, rotate_scatter3d -from .histogram import histogram, plot_library_size, plot_gene_set_expression +from . import colors +from . import tools +from .histogram import histogram +from .histogram import plot_gene_set_expression +from .histogram import plot_library_size +from .jitter import jitter from .marker import marker_plot +from .scatter import rotate_scatter3d +from .scatter import scatter +from .scatter import scatter2d +from .scatter import scatter3d from .scree import scree_plot -from .jitter import jitter from .variable_genes import plot_gene_variability -from . import tools, colors diff --git a/scprep/plot/colors.py b/scprep/plot/colors.py index 07093fb1..095dbcdb 100644 --- a/scprep/plot/colors.py +++ b/scprep/plot/colors.py @@ -1,7 +1,7 @@ -import numpy as np +from .._lazyload import matplotlib as mpl from . import tools -from .._lazyload import matplotlib as mpl +import numpy as np plt = mpl.pyplot diff --git a/scprep/plot/histogram.py b/scprep/plot/histogram.py index c6a21b42..047fa11e 100644 --- a/scprep/plot/histogram.py +++ b/scprep/plot/histogram.py @@ -1,11 +1,14 @@ -import numpy as np -import numbers - +from .. import measure +from .. import utils +from .tools import label_axis +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize from scipy import sparse -from .. import measure, utils -from .utils import _get_figure, show, temp_fontsize, parse_fontsize -from .tools import label_axis +import numbers +import numpy as np _EPS = np.finfo("float").eps diff --git a/scprep/plot/jitter.py b/scprep/plot/jitter.py index 49c9d177..ad50ebab 100644 --- a/scprep/plot/jitter.py +++ b/scprep/plot/jitter.py @@ -1,11 +1,16 @@ -import numpy as np -import pandas as pd - from .. import utils -from .utils import _get_figure, show, temp_fontsize, parse_fontsize, _with_default -from .tools import label_axis, generate_colorbar, generate_legend - from .scatter import _ScatterParams +from .tools import generate_colorbar +from .tools import generate_legend +from .tools import label_axis +from .utils import _get_figure +from .utils import _with_default +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize + +import numpy as np +import pandas as pd class _JitterParams(_ScatterParams): diff --git a/scprep/plot/marker.py b/scprep/plot/marker.py index 72c183ae..0ccbf6ad 100644 --- a/scprep/plot/marker.py +++ b/scprep/plot/marker.py @@ -1,10 +1,16 @@ -import numpy as np -import pandas as pd +from .. import select +from .. import stats +from .. import utils +from .tools import label_axis +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import shift_ticklabels +from .utils import show +from .utils import temp_fontsize from scipy.cluster import hierarchy -from .. import utils, stats, select -from .utils import _get_figure, show, temp_fontsize, parse_fontsize, shift_ticklabels -from .tools import label_axis +import numpy as np +import pandas as pd def _make_scatter_arrays( diff --git a/scprep/plot/scatter.py b/scprep/plot/scatter.py index 8da64547..77465a3a 100644 --- a/scprep/plot/scatter.py +++ b/scprep/plot/scatter.py @@ -1,29 +1,25 @@ -import numpy as np +from .. import select +from .. import utils +from .._lazyload import matplotlib as mpl +from . import colors +from .tools import create_colormap +from .tools import create_normalize +from .tools import generate_colorbar +from .tools import generate_legend +from .tools import label_axis +from .utils import _get_figure +from .utils import _in_ipynb +from .utils import _is_color_array +from .utils import _with_default +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize + import numbers +import numpy as np import pandas as pd import warnings -from .. import utils, select -from .utils import ( - _get_figure, - _is_color_array, - show, - _in_ipynb, - parse_fontsize, - temp_fontsize, - _with_default, -) -from .tools import ( - create_colormap, - create_normalize, - label_axis, - generate_colorbar, - generate_legend, -) -from . import colors - -from .._lazyload import matplotlib as mpl - plt = mpl.pyplot diff --git a/scprep/plot/scree.py b/scprep/plot/scree.py index c49bd15d..edaad647 100644 --- a/scprep/plot/scree.py +++ b/scprep/plot/scree.py @@ -1,10 +1,11 @@ -import numpy as np - from .. import utils from .._lazyload import matplotlib as mpl - -from .utils import _get_figure, show, temp_fontsize from .tools import label_axis +from .utils import _get_figure +from .utils import show +from .utils import temp_fontsize + +import numpy as np @utils._with_pkg(pkg="matplotlib", min_version=3) diff --git a/scprep/plot/tools.py b/scprep/plot/tools.py index e8324fda..aa9d9b0e 100644 --- a/scprep/plot/tools.py +++ b/scprep/plot/tools.py @@ -1,10 +1,11 @@ -import numpy as np -import warnings - from .. import utils -from .utils import _get_figure, parse_fontsize, temp_fontsize - from .._lazyload import matplotlib as mpl +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import temp_fontsize + +import numpy as np +import warnings plt = mpl.pyplot diff --git a/scprep/plot/utils.py b/scprep/plot/utils.py index 6f450e60..8f9986a9 100644 --- a/scprep/plot/utils.py +++ b/scprep/plot/utils.py @@ -1,11 +1,10 @@ -import numpy as np -import platform - from .. import utils - from .._lazyload import matplotlib as mpl from .._lazyload import mpl_toolkits +import numpy as np +import platform + plt = mpl.pyplot diff --git a/scprep/plot/variable_genes.py b/scprep/plot/variable_genes.py index 583b3985..7caad590 100644 --- a/scprep/plot/variable_genes.py +++ b/scprep/plot/variable_genes.py @@ -1,5 +1,6 @@ +from .. import measure +from .. import utils from .scatter import scatter -from .. import utils, measure @utils._with_pkg(pkg="matplotlib", min_version=3) diff --git a/scprep/reduce.py b/scprep/reduce.py index a29d5407..2b3b0a6c 100644 --- a/scprep/reduce.py +++ b/scprep/reduce.py @@ -1,12 +1,13 @@ -from sklearn import decomposition, random_projection -import sklearn.base -import pandas as pd +from . import utils from scipy import sparse +from sklearn import decomposition +from sklearn import random_projection + import numpy as np +import pandas as pd +import sklearn.base import warnings -from . import utils - class InvertibleRandomProjection(random_projection.GaussianRandomProjection): """Gaussian random projection with an inverse transform using the pseudoinverse.""" diff --git a/scprep/run/__init__.py b/scprep/run/__init__.py index 0a07c96e..3c3798aa 100644 --- a/scprep/run/__init__.py +++ b/scprep/run/__init__.py @@ -1,3 +1,4 @@ -from .r_function import RFunction, install_bioconductor -from .splatter import SplatSimulate +from .r_function import install_bioconductor +from .r_function import RFunction from .slingshot import Slingshot +from .splatter import SplatSimulate diff --git a/scprep/run/conversion.py b/scprep/run/conversion.py index d0353c3d..1024df86 100644 --- a/scprep/run/conversion.py +++ b/scprep/run/conversion.py @@ -1,9 +1,10 @@ +from .. import utils +from .._lazyload import anndata2ri +from .._lazyload import rpy2 + import numpy as np import warnings -from .. import utils -from .._lazyload import rpy2, anndata2ri - def _rpylist2py(robject): if not isinstance(robject, rpy2.robjects.vectors.ListVector): diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 37d66c72..0460fb55 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -1,6 +1,6 @@ -from . import conversion from .. import utils from .._lazyload import rpy2 +from . import conversion def _console_warning(s, log_fn): diff --git a/scprep/run/slingshot.py b/scprep/run/slingshot.py index 67ec5377..2ee8247c 100644 --- a/scprep/run/slingshot.py +++ b/scprep/run/slingshot.py @@ -1,10 +1,10 @@ +from .. import utils +from . import r_function + import numpy as np import pandas as pd import warnings -from . import r_function -from .. import utils - def install(site_repository=None, update=False, version=None, verbose=True): """Install the required R packages to run Slingshot. diff --git a/scprep/run/splatter.py b/scprep/run/splatter.py index 41a16bf9..e3137e5c 100644 --- a/scprep/run/splatter.py +++ b/scprep/run/splatter.py @@ -1,9 +1,9 @@ -import numpy as np +from . import r_function + import numbers +import numpy as np import warnings -from . import r_function - def _sum_to_one(x): x = x / np.sum(x) # fix numerical error diff --git a/scprep/sanitize.py b/scprep/sanitize.py index fa6f0d9e..c34c03ab 100644 --- a/scprep/sanitize.py +++ b/scprep/sanitize.py @@ -1,7 +1,8 @@ +from . import utils + import numpy as np import pandas as pd import warnings -from . import utils def check_numeric(data, dtype="float", copy=None, suppress_errors=False): diff --git a/scprep/select.py b/scprep/select.py index 6645c35b..3956a25f 100644 --- a/scprep/select.py +++ b/scprep/select.py @@ -1,12 +1,12 @@ +from . import utils +from scipy import sparse + +import numbers import numpy as np import pandas as pd -import numbers -from scipy import sparse -import warnings import re import sys - -from . import utils +import warnings if int(sys.version.split(".")[1]) < 7: _re_pattern = type(re.compile("")) diff --git a/scprep/stats.py b/scprep/stats.py index b478ef85..5a5a1df8 100644 --- a/scprep/stats.py +++ b/scprep/stats.py @@ -1,14 +1,18 @@ +from . import plot +from . import select +from . import utils +from ._lazyload import matplotlib +from scipy import sparse +from scipy import stats +from sklearn import metrics +from sklearn import neighbors + +import joblib import numbers import numpy as np import pandas as pd -from scipy import stats, sparse -from sklearn import neighbors, metrics -import joblib -from . import plot, utils, select import warnings -from ._lazyload import matplotlib - plt = matplotlib.pyplot diff --git a/scprep/transform.py b/scprep/transform.py index 23a93e4f..0a307ce4 100644 --- a/scprep/transform.py +++ b/scprep/transform.py @@ -1,7 +1,8 @@ -import numpy as np +from . import utils from scipy import sparse + +import numpy as np import warnings -from . import utils def sqrt(data): diff --git a/scprep/utils.py b/scprep/utils.py index 754263ac..23a174b4 100644 --- a/scprep/utils.py +++ b/scprep/utils.py @@ -1,13 +1,12 @@ -import numpy as np -import pandas as pd +from decorator import decorator +from scipy import sparse -import numbers -import warnings import importlib +import numbers +import numpy as np +import pandas as pd import re - -from scipy import sparse -from decorator import decorator +import warnings try: ModuleNotFoundError diff --git a/setup.py b/setup.py index fd09f748..981a5a29 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,8 @@ +from setuptools import find_packages +from setuptools import setup + import os import sys -from setuptools import setup, find_packages install_requires = [ "numpy>=1.12.0", diff --git a/test/_test_lazyload.py b/test/_test_lazyload.py index 5a24993b..218e6761 100644 --- a/test/_test_lazyload.py +++ b/test/_test_lazyload.py @@ -1,6 +1,6 @@ import numpy -import scipy import pandas +import scipy import sys diff --git a/test/test_filter.py b/test/test_filter.py index 56d1f114..a251e778 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -1,10 +1,12 @@ -from tools import utils, matrix, data -import scprep -import pandas as pd -import numpy as np - -from scipy import sparse from functools import partial +from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + +import numpy as np +import pandas as pd +import scprep import unittest diff --git a/test/test_hdf5.py b/test/test_hdf5.py index cee47972..fc2b7796 100644 --- a/test/test_hdf5.py +++ b/test/test_hdf5.py @@ -1,9 +1,11 @@ -from tools import data, utils -import os +from tools import data +from tools import utils + +import h5py import mock +import os import scprep import sys -import h5py import tables diff --git a/test/test_io.py b/test/test_io.py index fdd9c34b..eb8819c5 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -1,24 +1,22 @@ -import pandas as pd -import numpy as np -import fcsparser +from nose.tools import assert_raises +from parameterized import parameterized +from scipy import sparse +from tools import data +from tools import utils -import os -import sys import copy -import shutil -import zipfile -import urllib -import unittest +import fcsparser import mock - +import numpy as np +import os +import pandas as pd import scprep import scprep.io.utils - -from tools import data, utils - -from scipy import sparse -from parameterized import parameterized -from nose.tools import assert_raises +import shutil +import sys +import unittest +import urllib +import zipfile class TestMatrixToDataFrame(unittest.TestCase): diff --git a/test/test_lazyload.py b/test/test_lazyload.py index e4cfab81..bb092572 100644 --- a/test/test_lazyload.py +++ b/test/test_lazyload.py @@ -1,9 +1,10 @@ -import subprocess +from tools import data + import mock import os import scprep +import subprocess import sys -from tools import data def test_lazyload(): diff --git a/test/test_measure.py b/test/test_measure.py index 5d0bf526..170f6fff 100644 --- a/test/test_measure.py +++ b/test/test_measure.py @@ -1,10 +1,12 @@ -from tools import utils, matrix, data -import scprep -import pandas as pd -import numpy as np - -from scipy import sparse from functools import partial +from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + +import numpy as np +import pandas as pd +import scprep import unittest diff --git a/test/test_normalize.py b/test/test_normalize.py index 42feda73..7f5cf953 100644 --- a/test/test_normalize.py +++ b/test/test_normalize.py @@ -1,9 +1,11 @@ -from tools import utils, matrix, data -import numpy as np +from functools import partial from sklearn.preprocessing import normalize +from tools import data +from tools import matrix +from tools import utils +import numpy as np import scprep -from functools import partial import unittest diff --git a/test/test_patch.py b/test/test_patch.py index f6a5b628..a9cff611 100644 --- a/test/test_patch.py +++ b/test/test_patch.py @@ -1,7 +1,8 @@ -import scprep +from pandas.core.internals.blocks import ExtensionBlock + import numpy as np import pandas as pd -from pandas.core.internals.blocks import ExtensionBlock +import scprep def test_pandas_series_rmatmul(): diff --git a/test/test_plot.py b/test/test_plot.py index c7358b9d..4f0b5ced 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -1,23 +1,20 @@ +from packaging.version import Version +from scprep.plot.histogram import _symlog_bins +from scprep.plot.jitter import _JitterParams +from scprep.plot.scatter import _ScatterParams +from tools import data +from tools import utils + +import matplotlib import matplotlib.pyplot as plt +import numbers import numpy as np +import os import pandas as pd - import scprep -import matplotlib - -import os import sys -import numbers import unittest -from packaging.version import Version - -from scprep.plot.scatter import _ScatterParams -from scprep.plot.jitter import _JitterParams -from scprep.plot.histogram import _symlog_bins - -from tools import data, utils - def try_remove(filename): try: @@ -438,7 +435,7 @@ def test_discrete_tab20(self): np.testing.assert_equal(params.cmap.colors[:10], plt.cm.tab10.colors) np.testing.assert_equal( params.cmap.colors[10:], - plt.cm.tab20.colors[1 : 1 + (len(params.cmap.colors) - 10) * 2 : 2], + plt.cm.tab20.colors[1: 1 + (len(params.cmap.colors) - 10) * 2: 2], ) def test_continuous_less_than_20(self): diff --git a/test/test_reduce.py b/test/test_reduce.py index 51cc3054..723cf5f3 100644 --- a/test/test_reduce.py +++ b/test/test_reduce.py @@ -1,11 +1,13 @@ -from tools import utils, matrix, data -import scprep +from functools import partial from scipy import sparse -import numpy as np -import pandas as pd from sklearn import decomposition +from tools import data +from tools import matrix +from tools import utils -from functools import partial +import numpy as np +import pandas as pd +import scprep import unittest diff --git a/test/test_run.py b/test/test_run.py index 2dfd8c94..674d2e1f 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,21 +4,24 @@ # python 3.5 pass else: - from tools import utils, matrix, data + from tools import data + from tools import matrix + from tools import utils + + import anndata + import mock import numpy as np import pandas as pd + import rpy2.rinterface_lib.callbacks + import rpy2.rinterface_lib.embedded import rpy2.robjects as ro + import scipy.sparse import scprep - import scprep.run.r_function - import scprep.run.conversion import scprep.run - import unittest - import anndata + import scprep.run.conversion + import scprep.run.r_function import sklearn.cluster - import scipy.sparse - import rpy2.rinterface_lib.callbacks - import rpy2.rinterface_lib.embedded - import mock + import unittest builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror diff --git a/test/test_sanitize.py b/test/test_sanitize.py index 491c154d..f52e9dc4 100644 --- a/test/test_sanitize.py +++ b/test/test_sanitize.py @@ -1,7 +1,10 @@ -from tools import utils, matrix, data -import scprep +from tools import data +from tools import matrix +from tools import utils + import numpy as np import pandas as pd +import scprep import warnings diff --git a/test/test_select.py b/test/test_select.py index e0d44b99..9690eeb7 100644 --- a/test/test_select.py +++ b/test/test_select.py @@ -1,10 +1,12 @@ -from tools import data, matrix, utils -import scprep +from scipy import sparse +from tools import data +from tools import matrix +from tools import utils import numpy as np import pandas as pd +import scprep import unittest -from scipy import sparse class Test10X(unittest.TestCase): diff --git a/test/test_stats.py b/test/test_stats.py index 28ed21ac..678d3c23 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -1,13 +1,15 @@ -from tools import utils, matrix, data -import numpy as np +from functools import partial +from parameterized import parameterized from scipy import stats - from sklearn.metrics import mutual_info_score +from tools import data +from tools import matrix +from tools import utils + +import numpy as np +import os import scprep -from functools import partial import warnings -import os -from parameterized import parameterized def _test_fun_2d(X, fun, **kwargs): diff --git a/test/test_transform.py b/test/test_transform.py index a561fe0f..7f01eb38 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -1,8 +1,11 @@ -from tools import utils, matrix, data -import numpy as np -import scprep from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + +import numpy as np import pandas as pd +import scprep import warnings diff --git a/test/test_utils.py b/test/test_utils.py index 40013a38..1d3f9b64 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,9 +1,12 @@ -from tools import data, matrix, utils -import scprep +from parameterized import parameterized from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + import numpy as np import pandas as pd -from parameterized import parameterized +import scprep def test_with_pkg(): @@ -154,7 +157,8 @@ def test_combine_batches(): ) assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" - transform = lambda X: scprep.utils.combine_batches( + + def transform(X): return scprep.utils.combine_batches( [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], batch_labels=[0, 1], )[0] diff --git a/test/tools/data.py b/test/tools/data.py index 7df2b55d..f4156ae9 100644 --- a/test/tools/data.py +++ b/test/tools/data.py @@ -1,6 +1,6 @@ -import scprep -import os import numpy as np +import os +import scprep def _os_agnostic_fullpath_join(path): diff --git a/test/tools/matrix.py b/test/tools/matrix.py index 8bd0f2bd..635b9687 100644 --- a/test/tools/matrix.py +++ b/test/tools/matrix.py @@ -1,10 +1,11 @@ +from functools import partial +from packaging import version +from scipy import sparse +from scprep.utils import is_SparseDataFrame + import numpy as np import pandas as pd import warnings -from scipy import sparse -from functools import partial -from scprep.utils import is_SparseDataFrame -from packaging import version def _ignore_pandas_sparse_warning(): diff --git a/test/tools/utils.py b/test/tools/utils.py index afd32548..4fb68b66 100644 --- a/test/tools/utils.py +++ b/test/tools/utils.py @@ -1,10 +1,13 @@ -import numpy as np +from . import matrix +from nose.tools import assert_raises +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex from scipy import sparse +from scprep.utils import is_SparseDataFrame +from scprep.utils import toarray + +import numpy as np import pandas as pd -from nose.tools import assert_raises -from scprep.utils import toarray, is_SparseDataFrame -from . import matrix -from nose.tools import assert_raises_regex, assert_warns_regex import re From a5655e8a1339fa99ffaa64c761bb4f5edf82e9c7 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:41:23 -0500 Subject: [PATCH 13/44] fix flake8 args --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0cbe03e9..afa638cf 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,5 +23,5 @@ repos: rev: 3.8.4 hooks: - id: flake8 - args: [openproblems] + args: [scprep] additional_dependencies: ['hacking'] From 8b50abc607cb56f6e59021029c137b6145e03ae6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 15:42:46 +0000 Subject: [PATCH 14/44] pre-commit --- test/test_utils.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/test_utils.py b/test/test_utils.py index 1d3f9b64..6cc47257 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -158,10 +158,12 @@ def test_combine_batches(): assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" - def transform(X): return scprep.utils.combine_batches( - [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], - batch_labels=[0, 1], - )[0] + def transform(X): + return scprep.utils.combine_batches( + [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], + batch_labels=[0, 1], + )[0] + matrix.test_matrix_types( X, utils.assert_transform_equals, From 8012d2b743e70ca17c3180e2c192396b93ee233c Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:52:43 -0500 Subject: [PATCH 15/44] use flake8 everywhere --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index afa638cf..d39680a3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,5 +23,4 @@ repos: rev: 3.8.4 hooks: - id: flake8 - args: [scprep] additional_dependencies: ['hacking'] From 4f6e6ee208a60127b64cd091f08fcb29a941b6ff Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:52:49 -0500 Subject: [PATCH 16/44] flake8 --- doc/source/conf.py | 2 +- test/_test_lazyload.py | 15 +++++++-------- test/test_io.py | 14 ++++++++++---- test/test_measure.py | 3 --- test/test_normalize.py | 1 - test/test_patch.py | 2 +- test/test_plot.py | 4 ++-- test/test_run.py | 3 +-- test/test_sanitize.py | 6 ++++-- test/test_stats.py | 1 - test/test_transform.py | 2 -- test/test_utils.py | 10 ++++++---- test/tools/matrix.py | 7 ++++--- test/tools/utils.py | 7 +++++-- 14 files changed, 41 insertions(+), 36 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 2e5a8e46..a537b55f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -212,4 +212,4 @@ \texttt{\strut{}{{ docname }}}\\[-0.5\baselineskip] \noindent\rule{\textwidth}{0.4pt}} \vspace{-2\baselineskip} -""" +""" # noqa diff --git a/test/_test_lazyload.py b/test/_test_lazyload.py index 218e6761..794fce13 100644 --- a/test/_test_lazyload.py +++ b/test/_test_lazyload.py @@ -1,7 +1,6 @@ -import numpy -import pandas -import scipy -import sys +import numpy # noqa +import scipy # noqa +import pandas # noqa def test_lazyload(): @@ -14,11 +13,11 @@ def test_lazyload(): for module in scprep._lazyload._importspec.keys(): if module == "anndata2ri" and sys.version_info[:2] < (3, 6): continue - assert module not in scprep_loaded, module - if module in postloaded_modules: - assert getattr(scprep._lazyload, module).__class__ is type(scprep), module + if module in preloaded_modules: + assert getattr(scprep._lazyload, module).__class__ is type(scprep) else: assert ( getattr(scprep._lazyload, module).__class__ is scprep._lazyload.AliasModule - ), module + ) + assert module not in scprep_loaded, module diff --git a/test/test_io.py b/test/test_io.py index eb8819c5..7e63a285 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -198,7 +198,8 @@ def test_10X(): ) utils.assert_raises_message( FileNotFoundError, - "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)' must be present " + "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and " + "'barcodes.tsv(.gz)' must be present " "in {}".format(data.data_dir), scprep.io.load_10X, data.data_dir, @@ -236,7 +237,10 @@ def test_10X_zip_error(): def test_10X_zip_url(): X = data.load_10X() - filename = "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip" + filename = ( + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X.zip" + ) X_zip = scprep.io.load_10X_zip(filename) assert scprep.utils.is_sparse_dataframe(X_zip) assert np.sum(np.sum(X != X_zip)) == 0 @@ -723,7 +727,8 @@ def test_download_google_drive_large(): def test_download_url(): X = data.load_10X() scprep.io.download.download_url( - "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X/matrix.mtx.gz", + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X/matrix.mtx.gz", "url_test.mtx.gz", ) Y = scprep.io.load_mtx("url_test.mtx.gz").T @@ -734,7 +739,8 @@ def test_download_url(): def test_download_zip(): X = data.load_10X() scprep.io.download.download_and_extract_zip( - "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip", + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X.zip", "zip_test", ) Y = scprep.io.load_10X("zip_test/test_10X") diff --git a/test/test_measure.py b/test/test_measure.py index 170f6fff..7f2102cf 100644 --- a/test/test_measure.py +++ b/test/test_measure.py @@ -1,11 +1,8 @@ -from functools import partial -from scipy import sparse from tools import data from tools import matrix from tools import utils import numpy as np -import pandas as pd import scprep import unittest diff --git a/test/test_normalize.py b/test/test_normalize.py index 7f5cf953..a0567782 100644 --- a/test/test_normalize.py +++ b/test/test_normalize.py @@ -1,4 +1,3 @@ -from functools import partial from sklearn.preprocessing import normalize from tools import data from tools import matrix diff --git a/test/test_patch.py b/test/test_patch.py index a9cff611..67ca4c81 100644 --- a/test/test_patch.py +++ b/test/test_patch.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd -import scprep +import scprep # noqa def test_pandas_series_rmatmul(): diff --git a/test/test_plot.py b/test/test_plot.py index 4f0b5ced..e354d0c1 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -7,7 +7,6 @@ import matplotlib import matplotlib.pyplot as plt -import numbers import numpy as np import os import pandas as pd @@ -435,7 +434,7 @@ def test_discrete_tab20(self): np.testing.assert_equal(params.cmap.colors[:10], plt.cm.tab10.colors) np.testing.assert_equal( params.cmap.colors[10:], - plt.cm.tab20.colors[1: 1 + (len(params.cmap.colors) - 10) * 2: 2], + plt.cm.tab20.colors[1 : 1 + (len(params.cmap.colors) - 10) * 2 : 2], ) def test_continuous_less_than_20(self): @@ -1462,6 +1461,7 @@ def test_generate_colorbar_n_ticks(self): def test_generate_colorbar_vmin_vmax_none(self): cb = scprep.plot.tools.generate_colorbar("inferno") + assert len(cb.get_ticks()) == 0 utils.assert_warns_message( UserWarning, "Cannot set `n_ticks` without setting `vmin` and `vmax`.", diff --git a/test/test_run.py b/test/test_run.py index 674d2e1f..5881f795 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -5,7 +5,6 @@ pass else: from tools import data - from tools import matrix from tools import utils import anndata @@ -418,7 +417,7 @@ def test_conversion_dataframe(): assert np.all(x["x"] == np.array([1, 2, 3])) assert np.all(x["y"] == np.array(["a", "b", "c"])) - def test_conversion_spmatrix(): + def test_conversion_sce(): scprep.run.install_bioconductor("SingleCellExperiment") ro.r("library(SingleCellExperiment)") ro.r("X <- matrix(1:6, nrow=2, ncol=3)") diff --git a/test/test_sanitize.py b/test/test_sanitize.py index f52e9dc4..6626f5d3 100644 --- a/test/test_sanitize.py +++ b/test/test_sanitize.py @@ -69,7 +69,8 @@ def test_check_index(): scprep.sanitize.check_index(X) with utils.assert_warns_message( RuntimeWarning, - "Renamed 2 copies of index GATGAGGCATTTCAGG-1 to (GATGAGGCATTTCAGG-1, GATGAGGCATTTCAGG-1.1)", + "Renamed 2 copies of index GATGAGGCATTTCAGG-1 to " + "(GATGAGGCATTTCAGG-1, GATGAGGCATTTCAGG-1.1)", ): scprep.sanitize.check_index(X.iloc[[0, 0]]) with warnings.catch_warnings(): @@ -87,7 +88,8 @@ def test_check_index(): assert Y.loc["GATGAGGCATTTCAGG-1"].shape[0] == 2 with utils.assert_warns_message( RuntimeWarning, - "Renamed 3 copies of index GTCATTTCATCTCGCT-1 to (GTCATTTCATCTCGCT-1, GTCATTTCATCTCGCT-1.1, GTCATTTCATCTCGCT-1.2)", + "Renamed 3 copies of index GTCATTTCATCTCGCT-1 to " + "(GTCATTTCATCTCGCT-1, GTCATTTCATCTCGCT-1.1, GTCATTTCATCTCGCT-1.2)", ): scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) with warnings.catch_warnings(): diff --git a/test/test_stats.py b/test/test_stats.py index 678d3c23..ada62db3 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -1,7 +1,6 @@ from functools import partial from parameterized import parameterized from scipy import stats -from sklearn.metrics import mutual_info_score from tools import data from tools import matrix from tools import utils diff --git a/test/test_transform.py b/test/test_transform.py index 7f01eb38..78b944a4 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -1,10 +1,8 @@ -from scipy import sparse from tools import data from tools import matrix from tools import utils import numpy as np -import pandas as pd import scprep import warnings diff --git a/test/test_utils.py b/test/test_utils.py index 1d3f9b64..6cc47257 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -158,10 +158,12 @@ def test_combine_batches(): assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" - def transform(X): return scprep.utils.combine_batches( - [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], - batch_labels=[0, 1], - )[0] + def transform(X): + return scprep.utils.combine_batches( + [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], + batch_labels=[0, 1], + )[0] + matrix.test_matrix_types( X, utils.assert_transform_equals, diff --git a/test/tools/matrix.py b/test/tools/matrix.py index 635b9687..bfc59e38 100644 --- a/test/tools/matrix.py +++ b/test/tools/matrix.py @@ -1,4 +1,3 @@ -from functools import partial from packaging import version from scipy import sparse from scprep.utils import is_SparseDataFrame @@ -110,13 +109,15 @@ def _typename(X): def test_matrix_types(X, test_fun, matrix_types, *args, **kwargs): - """Test a function across a range of matrix types + """Test a function across a range of matrix types. Parameters ---------- X : matrix input test_fun : Function(X, *args, **kwargs) for testing - matrix_types : List of functions (typically class constructors) converting X to desired matrix formats + matrix_types : list + List of functions (typically class constructors) converting X + to desired matrix formats *args : positional arguments for test_fun **kwargs : keyword arguments for test_fun """ diff --git a/test/tools/utils.py b/test/tools/utils.py index 4fb68b66..fdfa4162 100644 --- a/test/tools/utils.py +++ b/test/tools/utils.py @@ -79,7 +79,10 @@ def assert_transform_unchanged(X, transform, check=assert_all_equal, **kwargs): def assert_transform_equivalent(X, Y, transform, check=assert_all_equal, **kwargs): - """Check the output of transform(X, **kwargs) == Y and transform(X, **kwargs) gives the same kind of matrix as X + """Check the transformation gives the right result and doesn't change the type. + + Ensures that transform(X, **kwargs) == Y and transform(X, **kwargs) + give the same kind of matrix as X. Parameters ---------- @@ -97,7 +100,7 @@ def assert_transform_equivalent(X, Y, transform, check=assert_all_equal, **kwarg Y2 = assert_transform_equals(X, Y, transform, check=check, **kwargs) assert assert_matrix_class_equivalent( X, Y2 - ), "{} produced inconsistent matrix output".format(_typename(X)) + ), "{} produced inconsistent matrix output".format(matrix._typename(X)) def assert_transform_raises(X, transform, exception=ValueError, **kwargs): From 3c74ac324575ca48d6865e85936942e7279eafdc Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 15:54:14 +0000 Subject: [PATCH 17/44] pre-commit --- test/_test_lazyload.py | 2 +- test/test_plot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/_test_lazyload.py b/test/_test_lazyload.py index 794fce13..a316ba2b 100644 --- a/test/_test_lazyload.py +++ b/test/_test_lazyload.py @@ -1,6 +1,6 @@ import numpy # noqa -import scipy # noqa import pandas # noqa +import scipy # noqa def test_lazyload(): diff --git a/test/test_plot.py b/test/test_plot.py index e354d0c1..fcaa7ba8 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -434,7 +434,7 @@ def test_discrete_tab20(self): np.testing.assert_equal(params.cmap.colors[:10], plt.cm.tab10.colors) np.testing.assert_equal( params.cmap.colors[10:], - plt.cm.tab20.colors[1 : 1 + (len(params.cmap.colors) - 10) * 2 : 2], + plt.cm.tab20.colors[1: 1 + (len(params.cmap.colors) - 10) * 2: 2], ) def test_continuous_less_than_20(self): From 36251581cd898dff6610f6f13a38ebdd8bb63fe9 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:55:06 -0500 Subject: [PATCH 18/44] import sys --- test/_test_lazyload.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/_test_lazyload.py b/test/_test_lazyload.py index 794fce13..21b39c1d 100644 --- a/test/_test_lazyload.py +++ b/test/_test_lazyload.py @@ -1,5 +1,6 @@ import numpy # noqa import scipy # noqa +import sys import pandas # noqa From 19412902e1a370d10da77cbc8f84f30384224450 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 10:58:19 -0500 Subject: [PATCH 19/44] fix setup.cfg --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 728a46b0..78ace4fe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,7 @@ warning-is-error = 0 [flake8] ignore = # top-level module docstring - D100, D104, W503 + D100, D104, W503, # space before : conflicts with black E203 per-file-ignores = From 70c0d95a68f21388fa2e7d1d62c37d242b3dd5c3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 18 Feb 2021 15:59:04 +0000 Subject: [PATCH 20/44] pre-commit --- test/test_plot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_plot.py b/test/test_plot.py index fcaa7ba8..e354d0c1 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -434,7 +434,7 @@ def test_discrete_tab20(self): np.testing.assert_equal(params.cmap.colors[:10], plt.cm.tab10.colors) np.testing.assert_equal( params.cmap.colors[10:], - plt.cm.tab20.colors[1: 1 + (len(params.cmap.colors) - 10) * 2: 2], + plt.cm.tab20.colors[1 : 1 + (len(params.cmap.colors) - 10) * 2 : 2], ) def test_continuous_less_than_20(self): From 0d90d020f1177b4fb9b21a3f62708988f5f508c5 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 11:03:20 -0500 Subject: [PATCH 21/44] document --- test/tools/utils.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/test/tools/utils.py b/test/tools/utils.py index fdfa4162..17d5ea8b 100644 --- a/test/tools/utils.py +++ b/test/tools/utils.py @@ -12,24 +12,32 @@ def assert_warns_message(expected_warning, expected_message, *args, **kwargs): + """Assert that the correct warning message is raised. + + Handles regex better than the default. + """ expected_regex = re.escape(expected_message) return assert_warns_regex(expected_warning, expected_regex, *args, **kwargs) -def assert_raises_message(expected_warning, expected_message, *args, **kwargs): +def assert_raises_message(expected_error, expected_message, *args, **kwargs): + """Assert that the correct error message is raised. + + Handles regex better than the default. + """ expected_regex = re.escape(expected_message) - return assert_raises_regex(expected_warning, expected_regex, *args, **kwargs) + return assert_raises_regex(expected_error, expected_regex, *args, **kwargs) def assert_all_equal(X, Y): - """Assert all values of two matrices are the same""" + """Assert all values of two matrices are the same.""" X = toarray(X) Y = toarray(Y) np.testing.assert_array_equal(X, Y) def assert_all_close(X, Y, rtol=1e-05, atol=1e-08): - """Assert all values of two matrices are similar + """Assert all values of two matrices are similar. Parameters ---------- @@ -42,7 +50,7 @@ def assert_all_close(X, Y, rtol=1e-05, atol=1e-08): def assert_transform_equals(X, Y, transform, check=assert_all_equal, **kwargs): - """Check that transform(X, **kwargs) == Y + """Check that transform(X, **kwargs) == Y. Parameters ---------- @@ -62,7 +70,7 @@ def assert_transform_equals(X, Y, transform, check=assert_all_equal, **kwargs): def assert_transform_unchanged(X, transform, check=assert_all_equal, **kwargs): - """Check that transform(X, **kwargs) == X + """Check that transform(X, **kwargs) == X. Parameters ---------- @@ -104,7 +112,7 @@ def assert_transform_equivalent(X, Y, transform, check=assert_all_equal, **kwarg def assert_transform_raises(X, transform, exception=ValueError, **kwargs): - """Check that transform(X) raises exception + """Check that transform(X) raises exception. Parameters ---------- @@ -129,7 +137,7 @@ def _sparse_dataframe_density(X): def assert_matrix_class_equivalent(X, Y): - """Check the format of X and Y are the same + """Check the format of X and Y are the same. We expect: * shape hasn't changed From 50892e4f360a4c5f7d08560c0985b9e1c2cfb69d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:37:10 -0500 Subject: [PATCH 22/44] install hdf5 --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 85a502e8..8a4a52d9 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -74,7 +74,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update -qq - sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg + sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf-dev - name: Set up Python uses: actions/setup-python@v2 From 49771eae358e835f47c308ac333e1ebf21e8989d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:40:55 -0500 Subject: [PATCH 23/44] drop python 3.5 support --- .github/workflows/run_tests.yml | 2 +- .pre-commit-config.yaml | 2 +- scprep/filter.py | 6 +++--- scprep/io/csv.py | 4 ++-- scprep/io/fcs.py | 2 +- scprep/plot/histogram.py | 6 +++--- scprep/plot/jitter.py | 2 +- scprep/plot/scatter.py | 8 ++++---- scprep/plot/scree.py | 2 +- scprep/plot/tools.py | 4 ++-- scprep/plot/variable_genes.py | 2 +- scprep/select.py | 4 ++-- setup.cfg | 3 +-- setup.py | 26 +++++++++----------------- 14 files changed, 32 insertions(+), 41 deletions(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 8a4a52d9..5b621893 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -55,10 +55,10 @@ jobs: fail-fast: false matrix: config: + - {name: '3.9', os: ubuntu-latest, python: '3.9', r: 'release' } - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'release' } - {name: '3.7', os: ubuntu-latest, python: '3.7', r: 'release' } - {name: '3.6', os: ubuntu-latest, python: '3.6', r: 'release' } - - {name: '3.5', os: ubuntu-latest, python: '3.5', r: 'release' } steps: - name: Cancel Previous Runs diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d39680a3..a5339641 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,7 +14,7 @@ repos: rev: 20.8b1 hooks: - id: black - args: ['--target-version', 'py35'] + args: ['--target-version=py36'] - repo: https://github.com/pre-commit/mirrors-autopep8 rev: v1.5.4 hooks: diff --git a/scprep/filter.py b/scprep/filter.py index a07c6d29..37837a18 100644 --- a/scprep/filter.py +++ b/scprep/filter.py @@ -138,7 +138,7 @@ def filter_values( keep_cells="above", return_values=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove all cells with `values` above or below a certain threshold. @@ -207,7 +207,7 @@ def filter_library_size( keep_cells=None, return_library_size=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove all cells with library size above or below a certain threshold. @@ -274,7 +274,7 @@ def filter_gene_set_expression( keep_cells=None, return_expression=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove cells with total expression of a gene set above or below a threshold. diff --git a/scprep/io/csv.py b/scprep/io/csv.py index 327ebed5..4d082eb7 100644 --- a/scprep/io/csv.py +++ b/scprep/io/csv.py @@ -21,7 +21,7 @@ def load_csv( cell_names=True, sparse=False, chunksize=10000, - **kwargs + **kwargs, ): r"""Load a csv file. @@ -108,7 +108,7 @@ def load_tsv( gene_names=True, cell_names=True, sparse=False, - **kwargs + **kwargs, ): r"""Load a tsv file. diff --git a/scprep/io/fcs.py b/scprep/io/fcs.py index 6982b611..bbfac0f5 100644 --- a/scprep/io/fcs.py +++ b/scprep/io/fcs.py @@ -259,7 +259,7 @@ def load_fcs( channel_naming="$PnS", reformat_meta=True, override=False, - **kwargs + **kwargs, ): """Load a fcs file. diff --git a/scprep/plot/histogram.py b/scprep/plot/histogram.py index 047fa11e..f6761e9c 100644 --- a/scprep/plot/histogram.py +++ b/scprep/plot/histogram.py @@ -80,7 +80,7 @@ def histogram( alpha=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot a histogram. @@ -207,7 +207,7 @@ def plot_library_size( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the library size histogram. @@ -295,7 +295,7 @@ def plot_gene_set_expression( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the histogram of the expression of a gene set. diff --git a/scprep/plot/jitter.py b/scprep/plot/jitter.py index ad50ebab..8159bb3b 100644 --- a/scprep/plot/jitter.py +++ b/scprep/plot/jitter.py @@ -65,7 +65,7 @@ def jitter( vmax=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a jitter plot. diff --git a/scprep/plot/scatter.py b/scprep/plot/scatter.py index 77465a3a..e61c0a04 100644 --- a/scprep/plot/scatter.py +++ b/scprep/plot/scatter.py @@ -601,7 +601,7 @@ def scatter( azim=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a scatter plot. @@ -853,7 +853,7 @@ def scatter2d( legend_ncol=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a 2D scatter plot. @@ -1046,7 +1046,7 @@ def scatter3d( azim=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a 3D scatter plot. @@ -1229,7 +1229,7 @@ def rotate_scatter3d( elev=None, ipython_html="jshtml", dpi=None, - **kwargs + **kwargs, ): """Create a rotating 3D scatter plot. diff --git a/scprep/plot/scree.py b/scprep/plot/scree.py index edaad647..12c72358 100644 --- a/scprep/plot/scree.py +++ b/scprep/plot/scree.py @@ -19,7 +19,7 @@ def scree_plot( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the explained variance of each principal component. diff --git a/scprep/plot/tools.py b/scprep/plot/tools.py index aa9d9b0e..8d82ed98 100644 --- a/scprep/plot/tools.py +++ b/scprep/plot/tools.py @@ -99,7 +99,7 @@ def generate_legend( title_fontsize=None, max_rows=10, ncol=None, - **kwargs + **kwargs, ): """Generate a legend on an axis. @@ -180,7 +180,7 @@ def generate_colorbar( n_ticks="auto", labelpad=10, mappable=None, - **kwargs + **kwargs, ): """Generate a colorbar on an axis. diff --git a/scprep/plot/variable_genes.py b/scprep/plot/variable_genes.py index 7caad590..82ff8692 100644 --- a/scprep/plot/variable_genes.py +++ b/scprep/plot/variable_genes.py @@ -18,7 +18,7 @@ def plot_gene_variability( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the histogram of gene variability. diff --git a/scprep/select.py b/scprep/select.py index 3956a25f..fe79fb6b 100644 --- a/scprep/select.py +++ b/scprep/select.py @@ -312,7 +312,7 @@ def select_cols( starts_with=None, ends_with=None, exact_word=None, - regex=None + regex=None, ): """Select columns from a data matrix. @@ -470,7 +470,7 @@ def select_rows( starts_with=None, ends_with=None, exact_word=None, - regex=None + regex=None, ): """Select rows from a data matrix. diff --git a/setup.cfg b/setup.cfg index 78ace4fe..43bfdd83 100644 --- a/setup.cfg +++ b/setup.cfg @@ -20,8 +20,7 @@ exclude = build, dist, test, - doc, - Snakefile + doc [isort] profile = black diff --git a/setup.py b/setup.py index 981a5a29..17db4dbe 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ from setuptools import setup import os -import sys install_requires = [ "numpy>=1.12.0", @@ -13,12 +12,7 @@ "packaging", ] -optional_requires = [ - "fcsparser", - "tables", - "h5py", - "anndata", -] +optional_requires = ["fcsparser", "tables", "h5py", "anndata", "anndata2ri>=1.0.6"] test_requires = [ "nose", @@ -30,6 +24,9 @@ "packaging", "mock", "h5py", + "matplotlib>=3.0", + "rpy2>=3.0", + "black", ] doc_requires = [ @@ -37,16 +34,9 @@ "sphinxcontrib-napoleon", "ipykernel", "nbsphinx", + "autodocsumm", ] -if sys.version_info[:2] < (3, 6): - test_requires += ["matplotlib>=3.0,<3.1", "rpy2>=3.0,<3.1"] - doc_requires += ["autodocsumm!=0.2.0"] -else: - test_requires += ["matplotlib>=3.0", "rpy2>=3.0", "black"] - optional_requires += ["anndata2ri>=1.0.6"] - doc_requires += ["autodocsumm"] - version_py = os.path.join(os.path.dirname(__file__), "scprep", "version.py") version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() @@ -61,7 +51,7 @@ packages=find_packages(), license="GNU General Public License Version 2", install_requires=install_requires, - python_requires=">=3.5", + python_requires=">=3.6", extras_require={ "test": test_requires + optional_requires, "doc": doc_requires, @@ -88,8 +78,10 @@ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Bio-Informatics", ], ) From 9a57d47bf0ef9835129aa96d115b6c9628885804 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:48:14 -0500 Subject: [PATCH 24/44] fix typo --- .github/workflows/run_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 5b621893..a1841592 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -74,7 +74,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update -qq - sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf-dev + sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf5-dev - name: Set up Python uses: actions/setup-python@v2 From cbf167dd53360e0c3989edd19f95e29791218fdd Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:57:49 -0500 Subject: [PATCH 25/44] fix reference to license --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 17db4dbe..7bf49aa9 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ author="Scott Gigante, Daniel Burkhardt and Jay Stanley, Yale University", author_email="krishnaswamylab@gmail.com", packages=find_packages(), - license="GNU General Public License Version 2", + license="GNU General Public License Version 3", install_requires=install_requires, python_requires=">=3.6", extras_require={ From 528ec7d388e88280b5fa2737f43b247aa86158ac Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:57:56 -0500 Subject: [PATCH 26/44] remove travis --- .travis.yml | 58 -------------------------------------------------- travis_setup.R | 5 ----- 2 files changed, 63 deletions(-) delete mode 100644 .travis.yml delete mode 100644 travis_setup.R diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index e199304f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,58 +0,0 @@ -language: python -python: - - '3.5' - - '3.6' - - 3.7-dev -sudo: required -dist: xenial -addons: - apt: - packages: - - libhdf5-dev - - ffmpeg - - pandoc - - gfortran - - libblas-dev - - liblapack-dev - - libglu1-mesa-dev - - freeglut3-dev - - mesa-common-dev - - libgsl-dev -cache: - - pip - - apt - - directories: - - $HOME/R/Library -install: - - pip install -U . -before_script: - - >- - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys - E298A3A825C0D65DFD57CBB651716619E084DAB9 - - >- - echo "deb http://cran.rstudio.com/bin/linux/ubuntu xenial-cran35/" | sudo - tee -a /etc/apt/sources.list - - sudo apt-get update -qq - - sudo apt-get install r-base-core=3.6\* -y - - export R_LIBS_USER="$HOME/R/Library" - - 'echo ".libPaths(c(''$R_LIBS_USER'', .libPaths()))" >> $HOME/.Rprofile' - - Rscript travis_setup.R -script: - - python -c "import scprep" - - 'pip install -U .[test]' - - 'if [ "$TRAVIS_PYTHON_VERSION" != "3.5" ]; then black . --check --diff --target-version py35; fi' - - python setup.py test - - 'pip install -U .[doc]' - - python setup.py build_sphinx -after_success: - - coveralls -deploy: - provider: pypi - user: scottgigante - password: '${PYPI_PASSWORD}' - distributions: sdist bdist_wheel - skip_existing: true - skip_cleanup: true - 'on': - tags: true - branch: master diff --git a/travis_setup.R b/travis_setup.R deleted file mode 100644 index a71289e6..00000000 --- a/travis_setup.R +++ /dev/null @@ -1,5 +0,0 @@ -chooseCRANmirror(ind=1) -if (!require("remotes")) install.packages("remotes", quietly=TRUE) -remotes::update_packages(upgrade="always") -if (!require("BiocManager")) install.packages("BiocManager", quietly=TRUE) -BiocManager::install(update=TRUE, ask=FALSE) From dc1b4be856d5069507163e390c98bfae65ee8462 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:58:05 -0500 Subject: [PATCH 27/44] update contributing to reflect pre-commit --- CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a6b9b910..b7a45186 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Code Style and Testing `scprep` is maintained at close to 100% code coverage. Contributors are encouraged to write tests for their code, but if you do not know how to do so, please do not feel discouraged from contributing code! Others can always help you test your contribution. -Code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage). To automatically reformat your code when you run `git commit`, you can run `./autoblack.sh` in the root directory of this project to add a hook to your `git` repository. +Code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage) and [`flake8`](https://flake8.pycqa.org/en/latest/) with [`hacking`](https://github.com/openstack/hacking). Code is automatically reformatted by [`pre-commit`](https://pre-commit.com/) when you push to GitHub. Code of Conduct --------------- From 45cee78f96a92ff208fd7aaec5b45404becb08f8 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:58:19 -0500 Subject: [PATCH 28/44] add openstack and pre-commit badges --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 41b58b28..427c7a40 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,12 @@ .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black :alt: Code style: black +.. image:: https://img.shields.io/badge/style%20guide-openstack-eb1a32.svg + :target: https://docs.openstack.org/hacking/latest/user/hacking.html#styleguide + :alt: Style Guide: OpenStack +.. image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white + :target: https://github.com/pre-commit/pre-commit + :alt: pre-commit `scprep` provides an all-in-one framework for loading, preprocessing, and plotting matrices in Python, with a focus on single-cell genomics. From 15b4309b147dee97be44389ad17d01dd6ac56c2e Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:58:33 -0500 Subject: [PATCH 29/44] update requirements --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 31d927e5..5e580362 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scipy>=0.18.1 scikit-learn>=0.19.1 pandas>=0.25 decorator>=4.3.0 +packaging From b0566ce40e6bac5694dcde0ddd929716d449e87d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 18 Feb 2021 13:58:49 -0500 Subject: [PATCH 30/44] remove autoblack --- autoblack.sh | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 autoblack.sh diff --git a/autoblack.sh b/autoblack.sh deleted file mode 100644 index 9364a77b..00000000 --- a/autoblack.sh +++ /dev/null @@ -1,13 +0,0 @@ -cat <> .git/hooks/pre-commit -#!/bin/sh - -set -e - -files=\$(git diff --staged --name-only --diff-filter=d -- "*.py") - -for file in \$files; do - black -t py35 -q \$file - git add \$file -done -EOF -chmod +x .git/hooks/pre-commit From fed32fb27894ce23fef6c166141df05f59e82d70 Mon Sep 17 00:00:00 2001 From: Aarthi Venkat Date: Thu, 18 Feb 2021 16:52:26 -0500 Subject: [PATCH 31/44] Generalized install_github, formatting fixes --- scprep/run/__init__.py | 1 + scprep/run/dyngen.py | 266 +++++++++++++++++++++++++++++++++++++++ scprep/run/r_function.py | 71 ++++++++++- test/test_run.py | 42 +++++++ 4 files changed, 379 insertions(+), 1 deletion(-) create mode 100644 scprep/run/dyngen.py diff --git a/scprep/run/__init__.py b/scprep/run/__init__.py index 0a07c96e..1a4fde6b 100644 --- a/scprep/run/__init__.py +++ b/scprep/run/__init__.py @@ -1,3 +1,4 @@ from .r_function import RFunction, install_bioconductor from .splatter import SplatSimulate from .slingshot import Slingshot +from .dyngen import DyngenSimulate diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py new file mode 100644 index 00000000..f64249c9 --- /dev/null +++ b/scprep/run/dyngen.py @@ -0,0 +1,266 @@ +import pandas as pd +from . import r_function + +_get_backbones = r_function.RFunction( + setup=""" + library(dyngen) + """, + body=""" + names(list_backbones()) + """, +) + +_DyngenSimulate = r_function.RFunction( + args=""" + backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, + num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, + compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE + """, + setup=""" + library(dyngen) + """, + body=""" + if (!(backbone_name %in% names(list_backbones()))) { + stop("Input not in list of dyngen backbones. + Choose name from get_backbones().") + } + if (!is.na(random_state)) { + set.seed(random_state) + } + + backbones <- list('bifurcating'=backbone_bifurcating(), + 'bifurcating_converging'=backbone_bifurcating_converging(), + 'bifurcating_cycle'=backbone_bifurcating_cycle(), + 'bifurcating_loop'=backbone_bifurcating_loop(), + 'binary_tree'=backbone_binary_tree(), + 'branching'=backbone_branching(), + 'consecutive_bifurcating'=backbone_consecutive_bifurcating(), + 'converging'=backbone_converging(), + 'cycle'=backbone_cycle(), + 'cycle_simple'=backbone_cycle_simple(), + 'disconnected'=backbone_disconnected(), + 'linear'=backbone_linear(), + 'linear_simple'=backbone_linear_simple(), + 'trifurcating'=backbone_trifurcating() + ) + + backbone <- backbones[[backbone_name]] + # silent default behavior of dyngen + if (num_tfs < nrow(backbone$module_info)) { + if (verbose) { + cat("If input num_tfs is less than backbone default,", + "Dyngen uses backbone default.\n") + } + num_tfs <- nrow(backbone$module_info) + } + if (verbose) { + cat('Run Parameters:') + cat('\n\tBackbone:', backbone_name) + cat('\n\tNumber of Cells:', num_cells) + cat('\n\tNumber of TFs:', num_tfs) + cat('\n\tNumber of Targets:', num_targets) + cat('\n\tNumber of HKs:', num_hks, '\n') + } + + init <- initialise_model( + backbone=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_params=simulation_default( + census_interval=as.double(simulation_census_interval), + kinetics_noise_function = kinetics_noise_simple(mean=1, sd=0.005), + ssa_algorithm = ssa_etl(tau=300/3600), + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity), + num_cores = n_jobs, + download_cache_dir=NULL, + verbose=verbose + ) + out <- generate_dataset(init) + data <- list(cell_info = as.data.frame(out$dataset$cell_info), + expression = as.data.frame(as.matrix(out$dataset$expression))) + + if (compute_cellwise_grn) { + data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) + data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) + } + if (compute_rna_velocity) { + data[['rna_velocity']] <- as.data.frame(as.matrix(out$dataset$rna_velocity)) + } + + data + """, +) + + +def install( + lib=None, dependencies=None, update=False, + repos="http://cran.us.r-project.org", build_vignettes=False, + force=False, verbose=True): + """Install Dyngen Github repository. + + Parameters + ---------- + lib: string + Directory to install the package. + If missing, defaults to the first element of .libPaths(). + dependencies: boolean, optional (default: None/NA) + When True, installs all packages specified under "Depends", "Imports", + "LinkingTo" and "Suggests". + When False, installs no dependencies. + When None/NA, installs all packages specified under "Depends", "Imports" + and "LinkingTo". + update: string or boolean, optional (default: False) + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. + For non-interactive sessions "ask" is equivalent to "always". + TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. + repos: string, optional (default: "http://cran.us.r-project.org"): + R package repository. + build_vignettes: boolean, optional (default: False) + Builds Github vignettes. + force: boolean, optional (default: False) + Forces installation even if remote state has not changed since previous install. + verbose: boolean, optional (default: True) + Install script verbosity. + """ + + r_function.install_github(repo="dynverse/dyngen", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose) + +def get_backbones(): + """Output full list of cell trajectory backbones. + + Returns + ------- + backbones: array of backbone names + """ + return(_get_backbones()) + + +def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, + simulation_census_interval=10, compute_cellwise_grn=False, + compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True): + """Simulate dataset with cellular backbone. + + The backbone determines the overall dynamic process during a simulation. + It consists of a set of gene modules, which regulate each other such that + expression of certain genes change over time in a specific manner. + + DyngenSimulate is a Python wrapper for the R package Dyngen. + Default values obtained from Github vignettes. + For more details, read about Dyngen on Github_. + + .. _Github: https://github.com/dynverse/dyngen + + Parameters + ---------- + backbone: string + Backbone name from dyngen list of backbones. + Get list with get_backbones()). + num_cells: int, optional (default: 500) + Number of cells. + num_tfs: int, optional (default: 100) + Number of transcription factors. + The TFs are the main drivers of the molecular changes in the simulation. + A TF can only be regulated by other TFs or itself. + + NOTE: If num_tfs input is less than nrow(backbone$module_info), + Dyngen will default to nrow(backbone$module_info). + This quantity varies between backbones and with each run (without seed). + It is generally less than 75. + It is recommended to input num_tfs >= 100 to stabilize the output. + num_targets: int, optional (default: 50) + Number of target genes. + Target genes are regulated by a TF or another target gene, + but are always downstream of at least one TF. + num_hks: int, optional (default: 25) + Number of housekeeping genees. + Housekeeping genes are completely separate from any TFs or target genes. + simulation_census_interval: int, optional (default: 10) + Stores the abundance levels only after a specific interval has passed. + The lower the interval, the higher detail of simulation trajectory retained, + though many timepoints will contain similar information. + compute_cellwise_grn: boolean, optional (default: False) + If True, computes the ground truth cellwise gene regulatory networks. + Also outputs ground truth bulk (entire dataset) regulatory network. + NOTE: Increases compute time significantly. + compute_rna_velocity: boolean, optional (default: False) + If true, computes the ground truth propensity ratios after simulation. + NOTE: Increases compute time significantly. + n_jobs: int, optional (default: 8) + Number of cores to use. + random_state: int, optional (default: None) + Fixes seed for simulation generator. + verbose: boolean, optional (default: True) + Data generation verbosity. + + Returns + ------- + Dictionary data of pd.DataFrames: + data['cell_info']: pd.DataFrame, shape (n_cells, 7) + Columns: cell_id, step_ix, simulation_i, sim_time, num_molecules, mult, + lib_size + sim_time is the simulated timepoint for a given cell. + + data['expression']: pd.DataFrame, shape (n_cells, n_genes) + Log-transformed counts with dropout. + + If compute_cellwise_grn is True, + data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) + Columns: regulator, target, strength, effect. + Strength is positive and unbounded. + Effect is either +1 (for activation) or -1 (for inhibition). + + data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) + Columns: cell_id, regulator, target, strength. + The output does not include all edges per cell. + The regulatory effect lies between [−1, 1], where -1 is complete inhibition + of target by TF, +1 is maximal activation of target by TF, + and 0 is inactivity of the regulatory interaction between R and T. + + If compute_rna_velocity is True, + data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) + Propensity ratios for each cell. + + Example + -------- + >>> import scprep + >>> scprep.run.dyngen.install() + >>> backbones = scprep.run.dyngen.get_backbones() + >>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) + """ + + kwargs = {} + if random_state is not None: + kwargs["random_state"] = random_state + + rdata = _DyngenSimulate(backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + **kwargs) + data = {} + data['cell_info'] = pd.DataFrame(rdata['cell_info']) + data['expression'] = pd.DataFrame(rdata['expression']) + if compute_cellwise_grn: + data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn']) + data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn']) + if compute_rna_velocity: + data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity']) + + return(data) diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 37d66c72..630f389d 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -149,7 +149,6 @@ def __call__(self, *args, rpy_cleanup=None, rpy_verbose=None, **kwargs): """, ) - def install_bioconductor( package=None, site_repository=None, update=False, version=None, verbose=True ): @@ -178,3 +177,73 @@ def install_bioconductor( if version is not None: kwargs["version"] = version _install_bioconductor(**kwargs) + + +_install_github = RFunction( + args="""repo=character(), lib=.libPaths()[1], dependencies=NA, + update=FALSE, repos='http://cran.us.r-project.org', + build_vignettes=FALSE, force=FALSE, verbose=TRUE""", + body=""" + quiet <- !verbose + + if (!require('remotes')) install.packages('remotes') + remotes::install_github(repo=repo, + lib=lib, dependencies=dependencies, + upgrade=update, repos=repos, + build_vignettes=build_vignettes, + force=force, quiet=quiet) + + # prepend path to libPaths if new library + if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths())) + + if (verbose) cat('.libPaths():', .libPaths()) + """ +) + +def install_github( + repo, lib=None, dependencies=None, update=False, + repos="http://cran.us.r-project.org", build_vignettes=False, + force=False, verbose=True): + """Install a Github repository. + + Parameters + ---------- + repo: string + Github repository name to install. + lib: string + Directory to install the package. + If missing, defaults to the first element of .libPaths(). + dependencies: boolean, optional (default: None/NA) + When True, installs all packages specified under "Depends", "Imports", + "LinkingTo" and "Suggests". + When False, installs no dependencies. + When None/NA, installs all packages specified under "Depends", "Imports" + and "LinkingTo". + update: string or boolean, optional (default: False) + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. + For non-interactive sessions "ask" is equivalent to "always". + TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. + repos: string, optional (default: "http://cran.us.r-project.org"): + R package repository. + build_vignettes: boolean, optional (default: False) + Builds Github vignettes. + force: boolean, optional (default: False) + Forces installation even if remote state has not changed since previous install. + verbose: boolean, optional (default: True) + Install script verbosity. + """ + kwargs = {} + if lib is not None: + kwargs["lib"] = lib + if dependencies is not None: + kwargs["dependencies"] = dependencies + + _install_github(repo=repo, + update=update, + repos=repos, + build_vignettes=build_vignettes, + force=force, + verbose=verbose, + **kwargs) diff --git a/test/test_run.py b/test/test_run.py index 2dfd8c94..7f3f3a85 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -237,7 +237,49 @@ def test_splatter_warning(self): assert ( rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) + + class TestDyngen(unittest.TestCase): + @classmethod + def setUpClass(self): + scprep.run.dyngen.install(verbose=False) + + def test_dyngen_backbone_not_in_list(self): + utils.assert_raises_message( + rpy2.rinterface_lib.embedded.RRuntimeError, + "Error in (function (backbone_name = character(), num_cells = 500, num_tfs = 100, :", + scprep.run.DyngenSimulate, + backbone="not_a_backbone", + ) + def test_dyngen_default(self): + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, + num_targets=10, num_hks=10,verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression'} + assert sim['cell_info'].shape == (50, 7) + assert sim['expression'].shape == (50, 70) + + def test_dyngen_with_grn(self): + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, + num_targets=10, num_hks=10, + compute_cellwise_grn=True, verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression', 'bulk_grn', 'cellwise_grn'} + assert sim['cell_info'].shape == (50, 7) + assert sim['expression'].shape == (50, 70) + assert sim['bulk_grn'].shape == (134, 4) + assert sim['cellwsie_grn'].shape == (2133, 4) + + def test_dyngen_with_rna_velocity(self): + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, + num_targets=10, num_hks=10, + compute_rna_velocity=True, verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression', 'rna_velocity'} + assert sim['cell_info'].shape == (50, 7) + assert sim['expression'].shape == (50, 70) + assert sim['rna_velocity'].shape == (50, 70) + class TestSlingshot(unittest.TestCase): @classmethod def setUpClass(self): From 76d0d1589fc83bef25f50b17be07ad3efa981fc4 Mon Sep 17 00:00:00 2001 From: Aarthi Venkat Date: Thu, 18 Feb 2021 18:02:54 -0500 Subject: [PATCH 32/44] Handling fewer cells than specified with force_num_cells flag --- scprep/run/dyngen.py | 24 +++++++++++++++++++++++- test/test_run.py | 43 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 57 insertions(+), 10 deletions(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index f64249c9..c4eb00b2 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -147,7 +147,8 @@ def get_backbones(): def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, simulation_census_interval=10, compute_cellwise_grn=False, - compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True): + compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True, + force_num_cells=False): """Simulate dataset with cellular backbone. The backbone determines the overall dynamic process during a simulation. @@ -201,6 +202,9 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks Fixes seed for simulation generator. verbose: boolean, optional (default: True) Data generation verbosity. + force_num_cells: boolean, optional (default: False) + Dyngen occassionally produces fewer cells than specified. + Set this flag to TRUE to rerun Dyngen until correct cell count is reached. Returns ------- @@ -254,6 +258,24 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks verbose=verbose, rpy_verbose=verbose, **kwargs) + if force_num_cells: + if random_state is None: + random_state = -1 + while(pd.DataFrame(rdata['cell_info']).shape[0] != num_cells): + random_state += 1 + rdata = _DyngenSimulate(backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + random_state=random_state) + data = {} data['cell_info'] = pd.DataFrame(rdata['cell_info']) data['expression'] = pd.DataFrame(rdata['expression']) diff --git a/test/test_run.py b/test/test_run.py index 7f3f3a85..cc753bba 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,7 +4,7 @@ # python 3.5 pass else: - from tools import utils, matrix, data + #from tools import utils, matrix, data import numpy as np import pandas as pd import rpy2.robjects as ro @@ -19,6 +19,7 @@ import rpy2.rinterface_lib.callbacks import rpy2.rinterface_lib.embedded import mock + import re builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror @@ -237,6 +238,7 @@ def test_splatter_warning(self): assert ( rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) + class TestDyngen(unittest.TestCase): @classmethod @@ -255,20 +257,37 @@ def test_dyngen_default(self): sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, num_targets=10, num_hks=10,verbose=False) + assert set(sim.keys()) == {'cell_info', 'expression'} + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['cell_info'].shape[1] == 7 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 + + def test_dyngen_force_cell_counts(self): + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, + num_targets=10, num_hks=10,verbose=False, + force_num_cells=True) + assert set(sim.keys()) == {'cell_info', 'expression'} assert sim['cell_info'].shape == (50, 7) assert sim['expression'].shape == (50, 70) - + def test_dyngen_with_grn(self): sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, num_targets=10, num_hks=10, compute_cellwise_grn=True, verbose=False) assert set(sim.keys()) == {'cell_info', 'expression', 'bulk_grn', 'cellwise_grn'} - assert sim['cell_info'].shape == (50, 7) - assert sim['expression'].shape == (50, 70) - assert sim['bulk_grn'].shape == (134, 4) - assert sim['cellwsie_grn'].shape == (2133, 4) + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['cell_info'].shape[1] == 7 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 + assert sim['bulk_grn'].shape[1] == 4 + assert sim['cellwise_grn'].shape[1] == 4 def test_dyngen_with_rna_velocity(self): sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, @@ -276,9 +295,15 @@ def test_dyngen_with_rna_velocity(self): compute_rna_velocity=True, verbose=False) assert set(sim.keys()) == {'cell_info', 'expression', 'rna_velocity'} - assert sim['cell_info'].shape == (50, 7) - assert sim['expression'].shape == (50, 70) - assert sim['rna_velocity'].shape == (50, 70) + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['cell_info'].shape[1] == 7 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 + assert sim['rna_velocity'].shape[0] > 0 + assert sim['rna_velocity'].shape[0] <= 50 + assert sim['rna_velocity'].shape[1] == 70 class TestSlingshot(unittest.TestCase): @classmethod From 6051b2bbb5a7e81e63ef7d5ea9bc122185dfd000 Mon Sep 17 00:00:00 2001 From: Aarthi Venkat Date: Thu, 18 Feb 2021 18:04:43 -0500 Subject: [PATCH 33/44] Import fixes --- test/test_run.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_run.py b/test/test_run.py index cc753bba..2249a708 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,7 +4,7 @@ # python 3.5 pass else: - #from tools import utils, matrix, data + from tools import utils, matrix, data import numpy as np import pandas as pd import rpy2.robjects as ro @@ -19,7 +19,6 @@ import rpy2.rinterface_lib.callbacks import rpy2.rinterface_lib.embedded import mock - import re builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror From 8822fe787baaf8e6776b806069dba64cfdb0881a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 19 Feb 2021 14:59:28 +0000 Subject: [PATCH 34/44] pre-commit --- scprep/run/dyngen.py | 185 ++++++++++++++++++++++----------------- scprep/run/r_function.py | 54 +++++++----- test/test_run.py | 134 +++++++++++++++++----------- 3 files changed, 217 insertions(+), 156 deletions(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index c4eb00b2..34e22e0b 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -1,6 +1,7 @@ -import pandas as pd from . import r_function +import pandas as pd + _get_backbones = r_function.RFunction( setup=""" library(dyngen) @@ -13,7 +14,7 @@ _DyngenSimulate = r_function.RFunction( args=""" backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, - num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, + num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE """, setup=""" @@ -27,11 +28,11 @@ if (!is.na(random_state)) { set.seed(random_state) } - + backbones <- list('bifurcating'=backbone_bifurcating(), 'bifurcating_converging'=backbone_bifurcating_converging(), 'bifurcating_cycle'=backbone_bifurcating_cycle(), - 'bifurcating_loop'=backbone_bifurcating_loop(), + 'bifurcating_loop'=backbone_bifurcating_loop(), 'binary_tree'=backbone_binary_tree(), 'branching'=backbone_branching(), 'consecutive_bifurcating'=backbone_consecutive_bifurcating(), @@ -43,7 +44,7 @@ 'linear_simple'=backbone_linear_simple(), 'trifurcating'=backbone_trifurcating() ) - + backbone <- backbones[[backbone_name]] # silent default behavior of dyngen if (num_tfs < nrow(backbone$module_info)) { @@ -52,7 +53,7 @@ "Dyngen uses backbone default.\n") } num_tfs <- nrow(backbone$module_info) - } + } if (verbose) { cat('Run Parameters:') cat('\n\tBackbone:', backbone_name) @@ -61,7 +62,7 @@ cat('\n\tNumber of Targets:', num_targets) cat('\n\tNumber of HKs:', num_hks, '\n') } - + init <- initialise_model( backbone=backbone, num_cells=num_cells, @@ -81,7 +82,7 @@ out <- generate_dataset(init) data <- list(cell_info = as.data.frame(out$dataset$cell_info), expression = as.data.frame(as.matrix(out$dataset$expression))) - + if (compute_cellwise_grn) { data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) @@ -93,14 +94,19 @@ data """, ) - - + + def install( - lib=None, dependencies=None, update=False, - repos="http://cran.us.r-project.org", build_vignettes=False, - force=False, verbose=True): + lib=None, + dependencies=None, + update=False, + repos="http://cran.us.r-project.org", + build_vignettes=False, + force=False, + verbose=True, +): """Install Dyngen Github repository. - + Parameters ---------- lib: string @@ -113,9 +119,9 @@ def install( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): @@ -127,34 +133,47 @@ def install( verbose: boolean, optional (default: True) Install script verbosity. """ - - r_function.install_github(repo="dynverse/dyngen", - update=update, - lib=lib, - dependencies=dependencies, - repos=repos, - verbose=verbose) - + + r_function.install_github( + repo="dynverse/dyngen", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose, + ) + + def get_backbones(): """Output full list of cell trajectory backbones. - + Returns ------- backbones: array of backbone names """ - return(_get_backbones()) + return _get_backbones() -def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, - simulation_census_interval=10, compute_cellwise_grn=False, - compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True, - force_num_cells=False): +def DyngenSimulate( + backbone, + num_cells=500, + num_tfs=100, + num_targets=50, + num_hks=25, + simulation_census_interval=10, + compute_cellwise_grn=False, + compute_rna_velocity=False, + n_jobs=7, + random_state=None, + verbose=True, + force_num_cells=False, +): """Simulate dataset with cellular backbone. - + The backbone determines the overall dynamic process during a simulation. It consists of a set of gene modules, which regulate each other such that expression of certain genes change over time in a specific manner. - + DyngenSimulate is a Python wrapper for the R package Dyngen. Default values obtained from Github vignettes. For more details, read about Dyngen on Github_. @@ -169,21 +188,21 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks num_cells: int, optional (default: 500) Number of cells. num_tfs: int, optional (default: 100) - Number of transcription factors. + Number of transcription factors. The TFs are the main drivers of the molecular changes in the simulation. A TF can only be regulated by other TFs or itself. - + NOTE: If num_tfs input is less than nrow(backbone$module_info), Dyngen will default to nrow(backbone$module_info). This quantity varies between backbones and with each run (without seed). - It is generally less than 75. + It is generally less than 75. It is recommended to input num_tfs >= 100 to stabilize the output. num_targets: int, optional (default: 50) - Number of target genes. + Number of target genes. Target genes are regulated by a TF or another target gene, - but are always downstream of at least one TF. + but are always downstream of at least one TF. num_hks: int, optional (default: 25) - Number of housekeeping genees. + Number of housekeeping genees. Housekeeping genes are completely separate from any TFs or target genes. simulation_census_interval: int, optional (default: 10) Stores the abundance levels only after a specific interval has passed. @@ -203,9 +222,9 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks verbose: boolean, optional (default: True) Data generation verbosity. force_num_cells: boolean, optional (default: False) - Dyngen occassionally produces fewer cells than specified. + Dyngen occassionally produces fewer cells than specified. Set this flag to TRUE to rerun Dyngen until correct cell count is reached. - + Returns ------- Dictionary data of pd.DataFrames: @@ -213,23 +232,23 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks Columns: cell_id, step_ix, simulation_i, sim_time, num_molecules, mult, lib_size sim_time is the simulated timepoint for a given cell. - + data['expression']: pd.DataFrame, shape (n_cells, n_genes) Log-transformed counts with dropout. - + If compute_cellwise_grn is True, data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) Columns: regulator, target, strength, effect. Strength is positive and unbounded. Effect is either +1 (for activation) or -1 (for inhibition). - + data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) - Columns: cell_id, regulator, target, strength. - The output does not include all edges per cell. + Columns: cell_id, regulator, target, strength. + The output does not include all edges per cell. The regulatory effect lies between [−1, 1], where -1 is complete inhibition of target by TF, +1 is maximal activation of target by TF, and 0 is inactivity of the regulatory interaction between R and T. - + If compute_rna_velocity is True, data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) Propensity ratios for each cell. @@ -241,48 +260,52 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks >>> backbones = scprep.run.dyngen.get_backbones() >>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) """ - + kwargs = {} if random_state is not None: kwargs["random_state"] = random_state - - rdata = _DyngenSimulate(backbone_name=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - rpy_verbose=verbose, - **kwargs) + + rdata = _DyngenSimulate( + backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + **kwargs, + ) if force_num_cells: if random_state is None: random_state = -1 - while(pd.DataFrame(rdata['cell_info']).shape[0] != num_cells): + while pd.DataFrame(rdata["cell_info"]).shape[0] != num_cells: random_state += 1 - rdata = _DyngenSimulate(backbone_name=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - rpy_verbose=verbose, - random_state=random_state) + rdata = _DyngenSimulate( + backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + random_state=random_state, + ) data = {} - data['cell_info'] = pd.DataFrame(rdata['cell_info']) - data['expression'] = pd.DataFrame(rdata['expression']) + data["cell_info"] = pd.DataFrame(rdata["cell_info"]) + data["expression"] = pd.DataFrame(rdata["expression"]) if compute_cellwise_grn: - data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn']) - data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn']) + data["cellwise_grn"] = pd.DataFrame(rdata["cellwise_grn"]) + data["bulk_grn"] = pd.DataFrame(rdata["bulk_grn"]) if compute_rna_velocity: - data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity']) - - return(data) + data["rna_velocity"] = pd.DataFrame(rdata["rna_velocity"]) + + return data diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 7eef89f3..8989b51c 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -149,6 +149,7 @@ def __call__(self, *args, rpy_cleanup=None, rpy_verbose=None, **kwargs): """, ) + def install_bioconductor( package=None, site_repository=None, update=False, version=None, verbose=True ): @@ -177,11 +178,11 @@ def install_bioconductor( if version is not None: kwargs["version"] = version _install_bioconductor(**kwargs) - - + + _install_github = RFunction( args="""repo=character(), lib=.libPaths()[1], dependencies=NA, - update=FALSE, repos='http://cran.us.r-project.org', + update=FALSE, repos='http://cran.us.r-project.org', build_vignettes=FALSE, force=FALSE, verbose=TRUE""", body=""" quiet <- !verbose @@ -192,20 +193,27 @@ def install_bioconductor( upgrade=update, repos=repos, build_vignettes=build_vignettes, force=force, quiet=quiet) - + # prepend path to libPaths if new library if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths())) - + if (verbose) cat('.libPaths():', .libPaths()) - """ + """, ) - + + def install_github( - repo, lib=None, dependencies=None, update=False, - repos="http://cran.us.r-project.org", build_vignettes=False, - force=False, verbose=True): + repo, + lib=None, + dependencies=None, + update=False, + repos="http://cran.us.r-project.org", + build_vignettes=False, + force=False, + verbose=True, +): """Install a Github repository. - + Parameters ---------- repo: string @@ -220,9 +228,9 @@ def install_github( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): @@ -239,11 +247,13 @@ def install_github( kwargs["lib"] = lib if dependencies is not None: kwargs["dependencies"] = dependencies - - _install_github(repo=repo, - update=update, - repos=repos, - build_vignettes=build_vignettes, - force=force, - verbose=verbose, - **kwargs) + + _install_github( + repo=repo, + update=update, + repos=repos, + build_vignettes=build_vignettes, + force=force, + verbose=verbose, + **kwargs, + ) diff --git a/test/test_run.py b/test/test_run.py index 67fd3a77..eeae60b5 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -240,72 +240,100 @@ def test_splatter_warning(self): rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) - class TestDyngen(unittest.TestCase): @classmethod def setUpClass(self): scprep.run.dyngen.install(verbose=False) - def test_dyngen_backbone_not_in_list(self): + def test_dyngen_backbone_not_in_list(self): utils.assert_raises_message( - rpy2.rinterface_lib.embedded.RRuntimeError, - "Error in (function (backbone_name = character(), num_cells = 500, num_tfs = 100, :", - scprep.run.DyngenSimulate, - backbone="not_a_backbone", - ) + rpy2.rinterface_lib.embedded.RRuntimeError, + "Error in (function (backbone_name = character(), num_cells = 500, num_tfs = 100, :", + scprep.run.DyngenSimulate, + backbone="not_a_backbone", + ) def test_dyngen_default(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, - num_targets=10, num_hks=10,verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['cell_info'].shape[1] == 7 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["cell_info"].shape[1] == 7 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 def test_dyngen_force_cell_counts(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, - num_targets=10, num_hks=10,verbose=False, - force_num_cells=True) - - assert set(sim.keys()) == {'cell_info', 'expression'} - assert sim['cell_info'].shape == (50, 7) - assert sim['expression'].shape == (50, 70) + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + force_num_cells=True, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape == (50, 7) + assert sim["expression"].shape == (50, 70) def test_dyngen_with_grn(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, - num_targets=10, num_hks=10, - compute_cellwise_grn=True, verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression', 'bulk_grn', 'cellwise_grn'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['cell_info'].shape[1] == 7 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 - assert sim['bulk_grn'].shape[1] == 4 - assert sim['cellwise_grn'].shape[1] == 4 - + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_cellwise_grn=True, + verbose=False, + ) + + assert set(sim.keys()) == { + "cell_info", + "expression", + "bulk_grn", + "cellwise_grn", + } + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["cell_info"].shape[1] == 7 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["bulk_grn"].shape[1] == 4 + assert sim["cellwise_grn"].shape[1] == 4 + def test_dyngen_with_rna_velocity(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, num_tfs=50, - num_targets=10, num_hks=10, - compute_rna_velocity=True, verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression', 'rna_velocity'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['cell_info'].shape[1] == 7 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 - assert sim['rna_velocity'].shape[0] > 0 - assert sim['rna_velocity'].shape[0] <= 50 - assert sim['rna_velocity'].shape[1] == 70 - + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_rna_velocity=True, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression", "rna_velocity"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["cell_info"].shape[1] == 7 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["rna_velocity"].shape[0] > 0 + assert sim["rna_velocity"].shape[0] <= 50 + assert sim["rna_velocity"].shape[1] == 70 + class TestSlingshot(unittest.TestCase): @classmethod def setUpClass(self): From 168cff9d2f54afb6a4e1c71f154891379ad47354 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 19 Feb 2021 10:40:00 -0500 Subject: [PATCH 35/44] Install dyngen deps from renv for caching --- .github/workflows/run_tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index a1841592..9280e307 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -116,6 +116,8 @@ jobs: renv::restore() renv::install("bioc::splatter") renv::install("bioc::slingshot") + renv::install("github::dynverse/dyngen") + renv::install("github::dynverse/dynwrap") shell: Rscript {0} - name: Install package & dependencies From 602c836a8ce121fd31117eb88c5a219b41bd0353 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 22 Feb 2021 10:16:26 -0500 Subject: [PATCH 36/44] change default empty drevi behaviour to np.zeros --- scprep/stats.py | 2 +- test/test_stats.py | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/scprep/stats.py b/scprep/stats.py index 5a5a1df8..7f4f4faa 100644 --- a/scprep/stats.py +++ b/scprep/stats.py @@ -209,7 +209,7 @@ def knnDREMI( ) # constant input: mutual information is numerically zero if return_drevi: - return 0, None + return 0, np.zeros((n_bins, n_bins), dtype=float) else: return 0 diff --git a/test/test_stats.py b/test/test_stats.py index ada62db3..58455caf 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -162,13 +162,19 @@ def test_knnDREMI(): Y = scprep.stats.knnDREMI(X[:, 0], X[:, 1]) assert isinstance(Y, float) np.testing.assert_allclose(Y, 0.16238906) + n_bins = 20 Y2, drevi = scprep.stats.knnDREMI( - X[:, 0], X[:, 1], plot=True, filename="test.png", return_drevi=True + X[:, 0], + X[:, 1], + plot=True, + filename="test.png", + return_drevi=True, + n_bins=n_bins, ) assert os.path.isfile("test.png") os.remove("test.png") assert Y2 == Y - assert drevi.shape == (20, 20) + assert drevi.shape == (n_bins, n_bins) matrix.test_all_matrix_types( X, utils.assert_transform_equals, @@ -178,9 +184,13 @@ def test_knnDREMI(): ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - assert scprep.stats.knnDREMI( - X[:, 0], np.repeat(X[0, 1], X.shape[0]), return_drevi=True - ) == (0, None) + n_bins = 10 + dremi, drevi = scprep.stats.knnDREMI( + X[:, 0], np.repeat(X[0, 1], X.shape[0]), n_bins=n_bins, return_drevi=True + ) + assert dremi == 0 + assert np.all(drevi == 0) + assert drevi.shape == (n_bins, n_bins) utils.assert_raises_message( ValueError, "Expected k as an integer. Got ", From 8348f11605ba4bfd1b4449df8040169fad43c14c Mon Sep 17 00:00:00 2001 From: Aarthi Venkat Date: Tue, 2 Mar 2021 23:16:32 -0500 Subject: [PATCH 37/44] force_num_cells fix, additional tests, formatting --- scprep/run/dyngen.py | 208 ++++++++++++++++++--------------------- scprep/run/r_function.py | 60 +++++------ test/test_run.py | 197 +++++++++++++++++++----------------- 3 files changed, 224 insertions(+), 241 deletions(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index 34e22e0b..c49314c5 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -1,6 +1,5 @@ -from . import r_function - import pandas as pd +from . import r_function _get_backbones = r_function.RFunction( setup=""" @@ -14,25 +13,21 @@ _DyngenSimulate = r_function.RFunction( args=""" backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, - num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, + num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE """, setup=""" library(dyngen) """, body=""" - if (!(backbone_name %in% names(list_backbones()))) { - stop("Input not in list of dyngen backbones. - Choose name from get_backbones().") - } if (!is.na(random_state)) { set.seed(random_state) } - + backbones <- list('bifurcating'=backbone_bifurcating(), 'bifurcating_converging'=backbone_bifurcating_converging(), 'bifurcating_cycle'=backbone_bifurcating_cycle(), - 'bifurcating_loop'=backbone_bifurcating_loop(), + 'bifurcating_loop'=backbone_bifurcating_loop(), 'binary_tree'=backbone_binary_tree(), 'branching'=backbone_branching(), 'consecutive_bifurcating'=backbone_consecutive_bifurcating(), @@ -44,7 +39,7 @@ 'linear_simple'=backbone_linear_simple(), 'trifurcating'=backbone_trifurcating() ) - + backbone <- backbones[[backbone_name]] # silent default behavior of dyngen if (num_tfs < nrow(backbone$module_info)) { @@ -53,7 +48,7 @@ "Dyngen uses backbone default.\n") } num_tfs <- nrow(backbone$module_info) - } + } if (verbose) { cat('Run Parameters:') cat('\n\tBackbone:', backbone_name) @@ -62,7 +57,7 @@ cat('\n\tNumber of Targets:', num_targets) cat('\n\tNumber of HKs:', num_hks, '\n') } - + init <- initialise_model( backbone=backbone, num_cells=num_cells, @@ -82,7 +77,7 @@ out <- generate_dataset(init) data <- list(cell_info = as.data.frame(out$dataset$cell_info), expression = as.data.frame(as.matrix(out$dataset$expression))) - + if (compute_cellwise_grn) { data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) @@ -94,19 +89,13 @@ data """, ) - - + def install( - lib=None, - dependencies=None, - update=False, - repos="http://cran.us.r-project.org", - build_vignettes=False, - force=False, - verbose=True, -): + lib=None, dependencies=None, update=False, + repos="http://cran.us.r-project.org", build_vignettes=False, + force=False, verbose=True): """Install Dyngen Github repository. - + Parameters ---------- lib: string @@ -119,11 +108,11 @@ def install( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". - TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. + TRUE and FALSE also accepted, correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): R package repository. build_vignettes: boolean, optional (default: False) @@ -133,47 +122,40 @@ def install( verbose: boolean, optional (default: True) Install script verbosity. """ - - r_function.install_github( - repo="dynverse/dyngen", - update=update, - lib=lib, - dependencies=dependencies, - repos=repos, - verbose=verbose, - ) - - + r_function.install_github(repo="dynverse/dynwrap", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose) + + r_function.install_github(repo="dynverse/dyngen", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose) + def get_backbones(): """Output full list of cell trajectory backbones. - + Returns ------- backbones: array of backbone names """ - return _get_backbones() + return(_get_backbones()) -def DyngenSimulate( - backbone, - num_cells=500, - num_tfs=100, - num_targets=50, - num_hks=25, - simulation_census_interval=10, - compute_cellwise_grn=False, - compute_rna_velocity=False, - n_jobs=7, - random_state=None, - verbose=True, - force_num_cells=False, -): +def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, + simulation_census_interval=10, compute_cellwise_grn=False, + compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True, + force_num_cells=False): """Simulate dataset with cellular backbone. - + The backbone determines the overall dynamic process during a simulation. It consists of a set of gene modules, which regulate each other such that expression of certain genes change over time in a specific manner. - + DyngenSimulate is a Python wrapper for the R package Dyngen. Default values obtained from Github vignettes. For more details, read about Dyngen on Github_. @@ -188,21 +170,21 @@ def DyngenSimulate( num_cells: int, optional (default: 500) Number of cells. num_tfs: int, optional (default: 100) - Number of transcription factors. + Number of transcription factors. The TFs are the main drivers of the molecular changes in the simulation. A TF can only be regulated by other TFs or itself. - + NOTE: If num_tfs input is less than nrow(backbone$module_info), Dyngen will default to nrow(backbone$module_info). This quantity varies between backbones and with each run (without seed). - It is generally less than 75. + It is generally less than 75. It is recommended to input num_tfs >= 100 to stabilize the output. num_targets: int, optional (default: 50) - Number of target genes. + Number of target genes. Target genes are regulated by a TF or another target gene, - but are always downstream of at least one TF. + but are always downstream of at least one TF. num_hks: int, optional (default: 25) - Number of housekeeping genees. + Number of housekeeping genees. Housekeeping genes are completely separate from any TFs or target genes. simulation_census_interval: int, optional (default: 10) Stores the abundance levels only after a specific interval has passed. @@ -222,33 +204,32 @@ def DyngenSimulate( verbose: boolean, optional (default: True) Data generation verbosity. force_num_cells: boolean, optional (default: False) - Dyngen occassionally produces fewer cells than specified. - Set this flag to TRUE to rerun Dyngen until correct cell count is reached. - + Dyngen occassionally produces fewer cells than specified. + Set this flag to True to rerun Dyngen until correct cell count is reached. + Returns ------- Dictionary data of pd.DataFrames: - data['cell_info']: pd.DataFrame, shape (n_cells, 7) - Columns: cell_id, step_ix, simulation_i, sim_time, num_molecules, mult, - lib_size + data['cell_info']: pd.DataFrame, shape (n_cells, 4) + Columns: cell_id, step_ix, simulation_i, sim_time sim_time is the simulated timepoint for a given cell. - + data['expression']: pd.DataFrame, shape (n_cells, n_genes) Log-transformed counts with dropout. - + If compute_cellwise_grn is True, data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) Columns: regulator, target, strength, effect. Strength is positive and unbounded. Effect is either +1 (for activation) or -1 (for inhibition). - + data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) - Columns: cell_id, regulator, target, strength. - The output does not include all edges per cell. + Columns: cell_id, regulator, target, strength. + The output does not include all edges per cell. The regulatory effect lies between [−1, 1], where -1 is complete inhibition of target by TF, +1 is maximal activation of target by TF, and 0 is inactivity of the regulatory interaction between R and T. - + If compute_rna_velocity is True, data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) Propensity ratios for each cell. @@ -261,51 +242,52 @@ def DyngenSimulate( >>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) """ + if backbone not in get_backbones(): + raise ValueError(('Input not in default backbone list. ' + 'Choose backbone from get_backbones()')) + kwargs = {} if random_state is not None: kwargs["random_state"] = random_state - - rdata = _DyngenSimulate( - backbone_name=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - rpy_verbose=verbose, - **kwargs, - ) + + rdata = _DyngenSimulate(backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + **kwargs) + if force_num_cells: if random_state is None: random_state = -1 - while pd.DataFrame(rdata["cell_info"]).shape[0] != num_cells: - random_state += 1 - rdata = _DyngenSimulate( - backbone_name=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - rpy_verbose=verbose, - random_state=random_state, - ) - + + if pd.DataFrame(rdata['cell_info']).shape[0] != num_cells: + random_state +=1 + rdata = DyngenSimulate(backbone=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + random_state=random_state) + data = {} - data["cell_info"] = pd.DataFrame(rdata["cell_info"]) - data["expression"] = pd.DataFrame(rdata["expression"]) + data['cell_info'] = pd.DataFrame(rdata['cell_info']) + data['expression'] = pd.DataFrame(rdata['expression']) if compute_cellwise_grn: - data["cellwise_grn"] = pd.DataFrame(rdata["cellwise_grn"]) - data["bulk_grn"] = pd.DataFrame(rdata["bulk_grn"]) + data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn']) + data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn']) if compute_rna_velocity: - data["rna_velocity"] = pd.DataFrame(rdata["rna_velocity"]) + data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity']) - return data + return(data) diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 8989b51c..1a799ce4 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -1,6 +1,6 @@ +from . import conversion from .. import utils from .._lazyload import rpy2 -from . import conversion def _console_warning(s, log_fn): @@ -149,7 +149,6 @@ def __call__(self, *args, rpy_cleanup=None, rpy_verbose=None, **kwargs): """, ) - def install_bioconductor( package=None, site_repository=None, update=False, version=None, verbose=True ): @@ -178,42 +177,35 @@ def install_bioconductor( if version is not None: kwargs["version"] = version _install_bioconductor(**kwargs) - - + + _install_github = RFunction( args="""repo=character(), lib=.libPaths()[1], dependencies=NA, - update=FALSE, repos='http://cran.us.r-project.org', + update=FALSE, repos='http://cran.rstudio.com', build_vignettes=FALSE, force=FALSE, verbose=TRUE""", body=""" quiet <- !verbose - if (!require('remotes')) install.packages('remotes') + if (!require('remotes', quietly=TRUE)) install.packages('remotes') remotes::install_github(repo=repo, lib=lib, dependencies=dependencies, upgrade=update, repos=repos, build_vignettes=build_vignettes, force=force, quiet=quiet) - + # prepend path to libPaths if new library if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths())) - + if (verbose) cat('.libPaths():', .libPaths()) - """, + """ ) - - + def install_github( - repo, - lib=None, - dependencies=None, - update=False, - repos="http://cran.us.r-project.org", - build_vignettes=False, - force=False, - verbose=True, -): + repo, lib=None, dependencies=None, update=False, + repos="http://cran.us.r-project.org", build_vignettes=False, + force=False, verbose=True): """Install a Github repository. - + Parameters ---------- repo: string @@ -228,11 +220,11 @@ def install_github( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE environment variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". - TRUE and FALSE are also accepted and correspond to "always" and "never" respectively. + TRUE and FALSE also accepted, correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): R package repository. build_vignettes: boolean, optional (default: False) @@ -247,13 +239,11 @@ def install_github( kwargs["lib"] = lib if dependencies is not None: kwargs["dependencies"] = dependencies - - _install_github( - repo=repo, - update=update, - repos=repos, - build_vignettes=build_vignettes, - force=force, - verbose=verbose, - **kwargs, - ) + + _install_github(repo=repo, + update=update, + repos=repos, + build_vignettes=build_vignettes, + force=force, + verbose=verbose, + **kwargs) diff --git a/test/test_run.py b/test/test_run.py index eeae60b5..d81cc54b 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,23 +4,21 @@ # python 3.5 pass else: - from tools import data - from tools import utils - - import anndata - import mock + from tools import utils, matrix, data import numpy as np import pandas as pd - import rpy2.rinterface_lib.callbacks - import rpy2.rinterface_lib.embedded import rpy2.robjects as ro - import scipy.sparse import scprep - import scprep.run - import scprep.run.conversion import scprep.run.r_function - import sklearn.cluster + import scprep.run.conversion + import scprep.run import unittest + import anndata + import sklearn.cluster + import scipy.sparse + import rpy2.rinterface_lib.callbacks + import rpy2.rinterface_lib.embedded + import mock builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror @@ -41,6 +39,47 @@ def test_install_bioc(): site_repository="https://bioconductor.org/packages/3.1/bioc", verbose=False, ) + + def test_install_github_lib(): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + packages <- installed.packages() + 'dyngen' %in% packages + """) + + assert fun() + + def test_install_github_dependencies_None(): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo")] + all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + """) + + assert fun() + + def test_install_github_dependencies_True(): + scprep.run.dyngen.install(verbose=False, dependencies=True) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", + "Suggests")] + all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + """) + + assert fun() class TestSplatter(unittest.TestCase): @classmethod @@ -240,100 +279,72 @@ def test_splatter_warning(self): rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) + class TestDyngen(unittest.TestCase): @classmethod def setUpClass(self): scprep.run.dyngen.install(verbose=False) - def test_dyngen_backbone_not_in_list(self): + def test_dyngen_backbone_not_in_list(self): utils.assert_raises_message( - rpy2.rinterface_lib.embedded.RRuntimeError, - "Error in (function (backbone_name = character(), num_cells = 500, num_tfs = 100, :", - scprep.run.DyngenSimulate, - backbone="not_a_backbone", - ) + rpy2.rinterface_lib.embedded.RRuntimeError, + ('Error in (function (backbone_name = character(), ' + 'num_cells = 500, num_tfs = 100, :'), + scprep.run.DyngenSimulate, + backbone="not_a_backbone", + ) def test_dyngen_default(self): - sim = scprep.run.DyngenSimulate( - backbone="bifurcating", - num_cells=50, - num_tfs=50, - num_targets=10, - num_hks=10, - verbose=False, - ) - - assert set(sim.keys()) == {"cell_info", "expression"} - assert sim["cell_info"].shape[0] > 0 - assert sim["cell_info"].shape[0] <= 50 - assert sim["cell_info"].shape[1] == 7 - assert sim["expression"].shape[0] > 0 - assert sim["expression"].shape[0] <= 50 - assert sim["expression"].shape[1] == 70 + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, + num_tfs=50, num_targets=10, num_hks=10, + verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression'} + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 def test_dyngen_force_cell_counts(self): - sim = scprep.run.DyngenSimulate( - backbone="bifurcating", - num_cells=50, - num_tfs=50, - num_targets=10, - num_hks=10, - verbose=False, - force_num_cells=True, - ) - - assert set(sim.keys()) == {"cell_info", "expression"} - assert sim["cell_info"].shape == (50, 7) - assert sim["expression"].shape == (50, 70) + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, + num_tfs=50, num_targets=10, num_hks=10, + verbose=False, force_num_cells=True) + + assert set(sim.keys()) == {'cell_info', 'expression'} + assert sim['cell_info'].shape[0] == 50 + assert sim['expression'].shape == (50, 70) def test_dyngen_with_grn(self): - sim = scprep.run.DyngenSimulate( - backbone="bifurcating", - num_cells=50, - num_tfs=50, - num_targets=10, - num_hks=10, - compute_cellwise_grn=True, - verbose=False, - ) - - assert set(sim.keys()) == { - "cell_info", - "expression", - "bulk_grn", - "cellwise_grn", - } - assert sim["cell_info"].shape[0] > 0 - assert sim["cell_info"].shape[0] <= 50 - assert sim["cell_info"].shape[1] == 7 - assert sim["expression"].shape[0] > 0 - assert sim["expression"].shape[0] <= 50 - assert sim["expression"].shape[1] == 70 - assert sim["bulk_grn"].shape[1] == 4 - assert sim["cellwise_grn"].shape[1] == 4 - + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, + num_tfs=50, num_targets=10, num_hks=10, + compute_cellwise_grn=True, verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression', + 'bulk_grn', 'cellwise_grn'} + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 + assert sim['bulk_grn'].shape[0] > 0 + assert sim['cellwise_grn'].shape[0] > 0 + def test_dyngen_with_rna_velocity(self): - sim = scprep.run.DyngenSimulate( - backbone="bifurcating", - num_cells=50, - num_tfs=50, - num_targets=10, - num_hks=10, - compute_rna_velocity=True, - verbose=False, - ) - - assert set(sim.keys()) == {"cell_info", "expression", "rna_velocity"} - assert sim["cell_info"].shape[0] > 0 - assert sim["cell_info"].shape[0] <= 50 - assert sim["cell_info"].shape[1] == 7 - assert sim["expression"].shape[0] > 0 - assert sim["expression"].shape[0] <= 50 - assert sim["expression"].shape[1] == 70 - assert sim["rna_velocity"].shape[0] > 0 - assert sim["rna_velocity"].shape[0] <= 50 - assert sim["rna_velocity"].shape[1] == 70 - + sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, + num_tfs=50, num_targets=10, num_hks=10, + compute_rna_velocity=True, verbose=False) + + assert set(sim.keys()) == {'cell_info', 'expression', 'rna_velocity'} + assert sim['cell_info'].shape[0] > 0 + assert sim['cell_info'].shape[0] <= 50 + assert sim['expression'].shape[0] > 0 + assert sim['expression'].shape[0] <= 50 + assert sim['expression'].shape[1] == 70 + assert sim['rna_velocity'].shape[0] > 0 + assert sim['rna_velocity'].shape[0] <= 50 + assert sim['rna_velocity'].shape[1] == 70 + class TestSlingshot(unittest.TestCase): @classmethod def setUpClass(self): @@ -511,7 +522,7 @@ def test_conversion_dataframe(): assert np.all(x["x"] == np.array([1, 2, 3])) assert np.all(x["y"] == np.array(["a", "b", "c"])) - def test_conversion_sce(): + def test_conversion_spmatrix(): scprep.run.install_bioconductor("SingleCellExperiment") ro.r("library(SingleCellExperiment)") ro.r("X <- matrix(1:6, nrow=2, ncol=3)") From 83cd195b0a16ed86bd81912962d2dce180e3f9a9 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 3 Mar 2021 04:17:40 +0000 Subject: [PATCH 38/44] pre-commit --- scprep/run/dyngen.py | 208 ++++++++++++++++++++++----------------- scprep/run/r_function.py | 56 ++++++----- test/test_run.py | 182 ++++++++++++++++++++-------------- 3 files changed, 260 insertions(+), 186 deletions(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index c49314c5..e36b947f 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -1,6 +1,7 @@ -import pandas as pd from . import r_function +import pandas as pd + _get_backbones = r_function.RFunction( setup=""" library(dyngen) @@ -13,7 +14,7 @@ _DyngenSimulate = r_function.RFunction( args=""" backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, - num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, + num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE """, setup=""" @@ -23,11 +24,11 @@ if (!is.na(random_state)) { set.seed(random_state) } - + backbones <- list('bifurcating'=backbone_bifurcating(), 'bifurcating_converging'=backbone_bifurcating_converging(), 'bifurcating_cycle'=backbone_bifurcating_cycle(), - 'bifurcating_loop'=backbone_bifurcating_loop(), + 'bifurcating_loop'=backbone_bifurcating_loop(), 'binary_tree'=backbone_binary_tree(), 'branching'=backbone_branching(), 'consecutive_bifurcating'=backbone_consecutive_bifurcating(), @@ -39,7 +40,7 @@ 'linear_simple'=backbone_linear_simple(), 'trifurcating'=backbone_trifurcating() ) - + backbone <- backbones[[backbone_name]] # silent default behavior of dyngen if (num_tfs < nrow(backbone$module_info)) { @@ -48,7 +49,7 @@ "Dyngen uses backbone default.\n") } num_tfs <- nrow(backbone$module_info) - } + } if (verbose) { cat('Run Parameters:') cat('\n\tBackbone:', backbone_name) @@ -57,7 +58,7 @@ cat('\n\tNumber of Targets:', num_targets) cat('\n\tNumber of HKs:', num_hks, '\n') } - + init <- initialise_model( backbone=backbone, num_cells=num_cells, @@ -77,7 +78,7 @@ out <- generate_dataset(init) data <- list(cell_info = as.data.frame(out$dataset$cell_info), expression = as.data.frame(as.matrix(out$dataset$expression))) - + if (compute_cellwise_grn) { data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) @@ -89,13 +90,19 @@ data """, ) - + + def install( - lib=None, dependencies=None, update=False, - repos="http://cran.us.r-project.org", build_vignettes=False, - force=False, verbose=True): + lib=None, + dependencies=None, + update=False, + repos="http://cran.us.r-project.org", + build_vignettes=False, + force=False, + verbose=True, +): """Install Dyngen Github repository. - + Parameters ---------- lib: string @@ -108,9 +115,9 @@ def install( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" + One of "default", "ask", "always", or "never". "default" Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". TRUE and FALSE also accepted, correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): @@ -122,40 +129,55 @@ def install( verbose: boolean, optional (default: True) Install script verbosity. """ - r_function.install_github(repo="dynverse/dynwrap", - update=update, - lib=lib, - dependencies=dependencies, - repos=repos, - verbose=verbose) - - r_function.install_github(repo="dynverse/dyngen", - update=update, - lib=lib, - dependencies=dependencies, - repos=repos, - verbose=verbose) - + r_function.install_github( + repo="dynverse/dynwrap", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose, + ) + + r_function.install_github( + repo="dynverse/dyngen", + update=update, + lib=lib, + dependencies=dependencies, + repos=repos, + verbose=verbose, + ) + + def get_backbones(): """Output full list of cell trajectory backbones. - + Returns ------- backbones: array of backbone names """ - return(_get_backbones()) + return _get_backbones() -def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks=25, - simulation_census_interval=10, compute_cellwise_grn=False, - compute_rna_velocity=False, n_jobs=7, random_state=None, verbose=True, - force_num_cells=False): +def DyngenSimulate( + backbone, + num_cells=500, + num_tfs=100, + num_targets=50, + num_hks=25, + simulation_census_interval=10, + compute_cellwise_grn=False, + compute_rna_velocity=False, + n_jobs=7, + random_state=None, + verbose=True, + force_num_cells=False, +): """Simulate dataset with cellular backbone. - + The backbone determines the overall dynamic process during a simulation. It consists of a set of gene modules, which regulate each other such that expression of certain genes change over time in a specific manner. - + DyngenSimulate is a Python wrapper for the R package Dyngen. Default values obtained from Github vignettes. For more details, read about Dyngen on Github_. @@ -170,21 +192,21 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks num_cells: int, optional (default: 500) Number of cells. num_tfs: int, optional (default: 100) - Number of transcription factors. + Number of transcription factors. The TFs are the main drivers of the molecular changes in the simulation. A TF can only be regulated by other TFs or itself. - + NOTE: If num_tfs input is less than nrow(backbone$module_info), Dyngen will default to nrow(backbone$module_info). This quantity varies between backbones and with each run (without seed). - It is generally less than 75. + It is generally less than 75. It is recommended to input num_tfs >= 100 to stabilize the output. num_targets: int, optional (default: 50) - Number of target genes. + Number of target genes. Target genes are regulated by a TF or another target gene, - but are always downstream of at least one TF. + but are always downstream of at least one TF. num_hks: int, optional (default: 25) - Number of housekeeping genees. + Number of housekeeping genees. Housekeeping genes are completely separate from any TFs or target genes. simulation_census_interval: int, optional (default: 10) Stores the abundance levels only after a specific interval has passed. @@ -204,32 +226,32 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks verbose: boolean, optional (default: True) Data generation verbosity. force_num_cells: boolean, optional (default: False) - Dyngen occassionally produces fewer cells than specified. + Dyngen occassionally produces fewer cells than specified. Set this flag to True to rerun Dyngen until correct cell count is reached. - + Returns ------- Dictionary data of pd.DataFrames: data['cell_info']: pd.DataFrame, shape (n_cells, 4) Columns: cell_id, step_ix, simulation_i, sim_time sim_time is the simulated timepoint for a given cell. - + data['expression']: pd.DataFrame, shape (n_cells, n_genes) Log-transformed counts with dropout. - + If compute_cellwise_grn is True, data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) Columns: regulator, target, strength, effect. Strength is positive and unbounded. Effect is either +1 (for activation) or -1 (for inhibition). - + data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) - Columns: cell_id, regulator, target, strength. - The output does not include all edges per cell. + Columns: cell_id, regulator, target, strength. + The output does not include all edges per cell. The regulatory effect lies between [−1, 1], where -1 is complete inhibition of target by TF, +1 is maximal activation of target by TF, and 0 is inactivity of the regulatory interaction between R and T. - + If compute_rna_velocity is True, data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) Propensity ratios for each cell. @@ -243,51 +265,59 @@ def DyngenSimulate(backbone, num_cells=500, num_tfs=100, num_targets=50, num_hks """ if backbone not in get_backbones(): - raise ValueError(('Input not in default backbone list. ' - 'Choose backbone from get_backbones()')) - + raise ValueError( + ( + "Input not in default backbone list. " + "Choose backbone from get_backbones()" + ) + ) + kwargs = {} if random_state is not None: kwargs["random_state"] = random_state - - rdata = _DyngenSimulate(backbone_name=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - rpy_verbose=verbose, - **kwargs) - + + rdata = _DyngenSimulate( + backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + **kwargs, + ) + if force_num_cells: if random_state is None: random_state = -1 - - if pd.DataFrame(rdata['cell_info']).shape[0] != num_cells: - random_state +=1 - rdata = DyngenSimulate(backbone=backbone, - num_cells=num_cells, - num_tfs=num_tfs, - num_targets=num_targets, - num_hks=num_hks, - simulation_census_interval=simulation_census_interval, - compute_cellwise_grn=compute_cellwise_grn, - compute_rna_velocity=compute_rna_velocity, - n_jobs=n_jobs, - verbose=verbose, - random_state=random_state) - + + if pd.DataFrame(rdata["cell_info"]).shape[0] != num_cells: + random_state += 1 + rdata = DyngenSimulate( + backbone=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + random_state=random_state, + ) + data = {} - data['cell_info'] = pd.DataFrame(rdata['cell_info']) - data['expression'] = pd.DataFrame(rdata['expression']) + data["cell_info"] = pd.DataFrame(rdata["cell_info"]) + data["expression"] = pd.DataFrame(rdata["expression"]) if compute_cellwise_grn: - data['cellwise_grn'] = pd.DataFrame(rdata['cellwise_grn']) - data['bulk_grn'] = pd.DataFrame(rdata['bulk_grn']) + data["cellwise_grn"] = pd.DataFrame(rdata["cellwise_grn"]) + data["bulk_grn"] = pd.DataFrame(rdata["bulk_grn"]) if compute_rna_velocity: - data['rna_velocity'] = pd.DataFrame(rdata['rna_velocity']) + data["rna_velocity"] = pd.DataFrame(rdata["rna_velocity"]) - return(data) + return data diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 1a799ce4..58efa510 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -1,6 +1,6 @@ -from . import conversion from .. import utils from .._lazyload import rpy2 +from . import conversion def _console_warning(s, log_fn): @@ -149,6 +149,7 @@ def __call__(self, *args, rpy_cleanup=None, rpy_verbose=None, **kwargs): """, ) + def install_bioconductor( package=None, site_repository=None, update=False, version=None, verbose=True ): @@ -177,11 +178,11 @@ def install_bioconductor( if version is not None: kwargs["version"] = version _install_bioconductor(**kwargs) - - + + _install_github = RFunction( args="""repo=character(), lib=.libPaths()[1], dependencies=NA, - update=FALSE, repos='http://cran.rstudio.com', + update=FALSE, repos='http://cran.rstudio.com', build_vignettes=FALSE, force=FALSE, verbose=TRUE""", body=""" quiet <- !verbose @@ -192,20 +193,27 @@ def install_bioconductor( upgrade=update, repos=repos, build_vignettes=build_vignettes, force=force, quiet=quiet) - + # prepend path to libPaths if new library if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths())) - + if (verbose) cat('.libPaths():', .libPaths()) - """ + """, ) - + + def install_github( - repo, lib=None, dependencies=None, update=False, - repos="http://cran.us.r-project.org", build_vignettes=False, - force=False, verbose=True): + repo, + lib=None, + dependencies=None, + update=False, + repos="http://cran.us.r-project.org", + build_vignettes=False, + force=False, + verbose=True, +): """Install a Github repository. - + Parameters ---------- repo: string @@ -220,9 +228,9 @@ def install_github( When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. For non-interactive sessions "ask" is equivalent to "always". TRUE and FALSE also accepted, correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): @@ -239,11 +247,13 @@ def install_github( kwargs["lib"] = lib if dependencies is not None: kwargs["dependencies"] = dependencies - - _install_github(repo=repo, - update=update, - repos=repos, - build_vignettes=build_vignettes, - force=force, - verbose=verbose, - **kwargs) + + _install_github( + repo=repo, + update=update, + repos=repos, + build_vignettes=build_vignettes, + force=force, + verbose=verbose, + **kwargs, + ) diff --git a/test/test_run.py b/test/test_run.py index d81cc54b..1a265317 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,21 +4,24 @@ # python 3.5 pass else: - from tools import utils, matrix, data + from tools import data + from tools import matrix + from tools import utils + + import anndata + import mock import numpy as np import pandas as pd + import rpy2.rinterface_lib.callbacks + import rpy2.rinterface_lib.embedded import rpy2.robjects as ro + import scipy.sparse import scprep - import scprep.run.r_function - import scprep.run.conversion import scprep.run - import unittest - import anndata + import scprep.run.conversion + import scprep.run.r_function import sklearn.cluster - import scipy.sparse - import rpy2.rinterface_lib.callbacks - import rpy2.rinterface_lib.embedded - import mock + import unittest builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror @@ -39,46 +42,49 @@ def test_install_bioc(): site_repository="https://bioconductor.org/packages/3.1/bioc", verbose=False, ) - + def test_install_github_lib(): scprep.run.dyngen.install(verbose=False) fun = scprep.run.RFunction( body=""" packages <- installed.packages() 'dyngen' %in% packages - """) - + """ + ) + assert fun() - + def test_install_github_dependencies_None(): scprep.run.dyngen.install(verbose=False) fun = scprep.run.RFunction( - body=""" + body=""" if (!require("pacman", quietly=TRUE)) { - install.packages("pacman", + install.packages("pacman", repos='http://cran.rstudio.com') } deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo")] all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) - """) + """ + ) assert fun() - + def test_install_github_dependencies_True(): scprep.run.dyngen.install(verbose=False, dependencies=True) fun = scprep.run.RFunction( - body=""" + body=""" if (!require("pacman", quietly=TRUE)) { - install.packages("pacman", + install.packages("pacman", repos='http://cran.rstudio.com') } deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", "Suggests")] all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) - """) - + """ + ) + assert fun() class TestSplatter(unittest.TestCase): @@ -279,72 +285,100 @@ def test_splatter_warning(self): rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) - class TestDyngen(unittest.TestCase): @classmethod def setUpClass(self): scprep.run.dyngen.install(verbose=False) - def test_dyngen_backbone_not_in_list(self): + def test_dyngen_backbone_not_in_list(self): utils.assert_raises_message( - rpy2.rinterface_lib.embedded.RRuntimeError, - ('Error in (function (backbone_name = character(), ' - 'num_cells = 500, num_tfs = 100, :'), - scprep.run.DyngenSimulate, - backbone="not_a_backbone", - ) + rpy2.rinterface_lib.embedded.RRuntimeError, + ( + "Error in (function (backbone_name = character(), " + "num_cells = 500, num_tfs = 100, :" + ), + scprep.run.DyngenSimulate, + backbone="not_a_backbone", + ) def test_dyngen_default(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, - num_tfs=50, num_targets=10, num_hks=10, - verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 def test_dyngen_force_cell_counts(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, - num_tfs=50, num_targets=10, num_hks=10, - verbose=False, force_num_cells=True) - - assert set(sim.keys()) == {'cell_info', 'expression'} - assert sim['cell_info'].shape[0] == 50 - assert sim['expression'].shape == (50, 70) + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + force_num_cells=True, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape[0] == 50 + assert sim["expression"].shape == (50, 70) def test_dyngen_with_grn(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, - num_tfs=50, num_targets=10, num_hks=10, - compute_cellwise_grn=True, verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression', - 'bulk_grn', 'cellwise_grn'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 - assert sim['bulk_grn'].shape[0] > 0 - assert sim['cellwise_grn'].shape[0] > 0 - + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_cellwise_grn=True, + verbose=False, + ) + + assert set(sim.keys()) == { + "cell_info", + "expression", + "bulk_grn", + "cellwise_grn", + } + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["bulk_grn"].shape[0] > 0 + assert sim["cellwise_grn"].shape[0] > 0 + def test_dyngen_with_rna_velocity(self): - sim = scprep.run.DyngenSimulate(backbone="bifurcating", num_cells=50, - num_tfs=50, num_targets=10, num_hks=10, - compute_rna_velocity=True, verbose=False) - - assert set(sim.keys()) == {'cell_info', 'expression', 'rna_velocity'} - assert sim['cell_info'].shape[0] > 0 - assert sim['cell_info'].shape[0] <= 50 - assert sim['expression'].shape[0] > 0 - assert sim['expression'].shape[0] <= 50 - assert sim['expression'].shape[1] == 70 - assert sim['rna_velocity'].shape[0] > 0 - assert sim['rna_velocity'].shape[0] <= 50 - assert sim['rna_velocity'].shape[1] == 70 - + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_rna_velocity=True, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression", "rna_velocity"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["rna_velocity"].shape[0] > 0 + assert sim["rna_velocity"].shape[0] <= 50 + assert sim["rna_velocity"].shape[1] == 70 + class TestSlingshot(unittest.TestCase): @classmethod def setUpClass(self): From e9413e04cc223f1d52342d51e89997a8b867e513 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 3 Mar 2021 14:30:46 -0500 Subject: [PATCH 39/44] bump version --- scprep/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scprep/version.py b/scprep/version.py index 2bed39b4..6849410a 100644 --- a/scprep/version.py +++ b/scprep/version.py @@ -1 +1 @@ -__version__ = "1.0.14a0" +__version__ = "1.1.0" From 2d4a1562c43fd9eaa6a215b9c62c9117f12dc7f3 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 3 Mar 2021 15:59:05 -0500 Subject: [PATCH 40/44] more verbose --- test/test_run.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/test/test_run.py b/test/test_run.py index 1a265317..31098604 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -81,11 +81,15 @@ def test_install_github_dependencies_True(): deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", "Suggests")] - all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + deps <- unname(unlist(deps)) + installed <- installed.packages()[, "Package"]) + success <- all(deps %in% installed) + list(success=success, deps=deps, installed=installed) """ ) - assert fun() + result = fun() + assert result["success"], result class TestSplatter(unittest.TestCase): @classmethod @@ -292,13 +296,12 @@ def setUpClass(self): def test_dyngen_backbone_not_in_list(self): utils.assert_raises_message( - rpy2.rinterface_lib.embedded.RRuntimeError, - ( - "Error in (function (backbone_name = character(), " - "num_cells = 500, num_tfs = 100, :" - ), + ValueError, + "Input not in default backbone list. " + "Choose backbone from get_backbones()", scprep.run.DyngenSimulate, backbone="not_a_backbone", + verbose=False, ) def test_dyngen_default(self): From ca3a63eb3cae4bbede9be5886eda78dd91bc3357 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 3 Mar 2021 15:59:13 -0500 Subject: [PATCH 41/44] force num_cells recursively --- scprep/run/dyngen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index e36b947f..766b75ee 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -263,7 +263,6 @@ def DyngenSimulate( >>> backbones = scprep.run.dyngen.get_backbones() >>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) """ - if backbone not in get_backbones(): raise ValueError( ( @@ -309,6 +308,7 @@ def DyngenSimulate( n_jobs=n_jobs, verbose=verbose, random_state=random_state, + force_num_cells=force_num_cells, ) data = {} From b56f653135458da51c8d92d8cb4add16e705dbb7 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Wed, 3 Mar 2021 16:58:10 -0500 Subject: [PATCH 42/44] fix typo --- test/test_run.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_run.py b/test/test_run.py index 31098604..e6f19e01 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -82,7 +82,7 @@ def test_install_github_dependencies_True(): deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", "Suggests")] deps <- unname(unlist(deps)) - installed <- installed.packages()[, "Package"]) + installed <- installed.packages()[, "Package"] success <- all(deps %in% installed) list(success=success, deps=deps, installed=installed) """ From 3caf8b5fad1654c3f19435b1c2418d30ffcb50ad Mon Sep 17 00:00:00 2001 From: Aarthi Venkat Date: Fri, 19 Mar 2021 13:02:01 -0400 Subject: [PATCH 43/44] Update dyngen installation, github installation tests --- scprep/run/__init__.py | 1 + scprep/run/dyngen.py | 49 ++++++++++++++------------------ test/test_run.py | 64 +++++++++++++++++++++++++++++++++++++----- 3 files changed, 79 insertions(+), 35 deletions(-) diff --git a/scprep/run/__init__.py b/scprep/run/__init__.py index d654d429..355e8930 100644 --- a/scprep/run/__init__.py +++ b/scprep/run/__init__.py @@ -1,5 +1,6 @@ from .dyngen import DyngenSimulate from .r_function import install_bioconductor +from .r_function import install_github from .r_function import RFunction from .slingshot import Slingshot from .splatter import SplatSimulate diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index 766b75ee..9b821f83 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -2,6 +2,17 @@ import pandas as pd +_install_dyngen = r_function.RFunction( + args="""lib=.libPaths()[1], dependencies=NA, + repos='http://cran.rstudio.com', verbose=TRUE""", + body=""" + install.packages(c("dynwrap", "dyngen"), + lib=lib, + repos=repos, + dependencies=dependencies) + """ +) + _get_backbones = r_function.RFunction( setup=""" library(dyngen) @@ -95,13 +106,10 @@ def install( lib=None, dependencies=None, - update=False, repos="http://cran.us.r-project.org", - build_vignettes=False, - force=False, verbose=True, ): - """Install Dyngen Github repository. + """Install Dyngen from CRAN. Parameters ---------- @@ -114,37 +122,22 @@ def install( When False, installs no dependencies. When None/NA, installs all packages specified under "Depends", "Imports" and "LinkingTo". - update: string or boolean, optional (default: False) - One of "default", "ask", "always", or "never". "default" - Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. - "ask" prompts the user for which out of date packages to upgrade. - For non-interactive sessions "ask" is equivalent to "always". - TRUE and FALSE also accepted, correspond to "always" and "never" respectively. repos: string, optional (default: "http://cran.us.r-project.org"): R package repository. - build_vignettes: boolean, optional (default: False) - Builds Github vignettes. - force: boolean, optional (default: False) - Forces installation even if remote state has not changed since previous install. verbose: boolean, optional (default: True) Install script verbosity. """ - r_function.install_github( - repo="dynverse/dynwrap", - update=update, - lib=lib, - dependencies=dependencies, - repos=repos, - verbose=verbose, - ) - - r_function.install_github( - repo="dynverse/dyngen", - update=update, - lib=lib, - dependencies=dependencies, + + kwargs = {} + if lib is not None: + kwargs["lib"] = lib + if dependencies is not None: + kwargs["dependencies"] = dependencies + + _install_dyngen( repos=repos, verbose=verbose, + **kwargs, ) diff --git a/test/test_run.py b/test/test_run.py index e6f19e01..f7a180d8 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -44,18 +44,18 @@ def test_install_bioc(): ) def test_install_github_lib(): - scprep.run.dyngen.install(verbose=False) + scprep.run.install_github("twitter/AnomalyDetection", verbose=False) fun = scprep.run.RFunction( body=""" packages <- installed.packages() - 'dyngen' %in% packages + 'AnomalyDetection' %in% packages """ ) assert fun() def test_install_github_dependencies_None(): - scprep.run.dyngen.install(verbose=False) + scprep.run.install_github("twitter/AnomalyDetection", verbose=False) fun = scprep.run.RFunction( body=""" if (!require("pacman", quietly=TRUE)) { @@ -63,7 +63,8 @@ def test_install_github_dependencies_None(): repos='http://cran.rstudio.com') } - deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo")] + deps <- pacman::p_depends(AnomalyDetection, local=TRUE)[c("Depends", + "Imports","LinkingTo")] all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) """ ) @@ -71,7 +72,8 @@ def test_install_github_dependencies_None(): assert fun() def test_install_github_dependencies_True(): - scprep.run.dyngen.install(verbose=False, dependencies=True) + scprep.run.install_github("twitter/AnomalyDetection", verbose=False, + dependencies=True) fun = scprep.run.RFunction( body=""" if (!require("pacman", quietly=TRUE)) { @@ -79,8 +81,8 @@ def test_install_github_dependencies_True(): repos='http://cran.rstudio.com') } - deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", - "Suggests")] + deps <- pacman::p_depends(AnomalyDetection, local=TRUE)[c("Depends", + "Imports","LinkingTo","Suggests")] deps <- unname(unlist(deps)) installed <- installed.packages()[, "Package"] success <- all(deps %in% installed) @@ -293,6 +295,54 @@ class TestDyngen(unittest.TestCase): @classmethod def setUpClass(self): scprep.run.dyngen.install(verbose=False) + + def test_install_dyngen_lib(self): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + packages <- installed.packages() + 'dyngen' %in% packages + """ + ) + + assert fun() + + def test_install_dyngen_dependencies_None(self): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo")] + all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + """ + ) + + assert fun() + + def test_install_dyngen_dependencies_True(self): + scprep.run.dyngen.install(verbose=False, dependencies=True) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", + "Suggests")] + deps <- unname(unlist(deps)) + installed <- installed.packages()[, "Package"] + success <- all(deps %in% installed) + list(success=success, deps=deps, installed=installed) + """ + ) + + result = fun() + assert result["success"], result def test_dyngen_backbone_not_in_list(self): utils.assert_raises_message( From 442a201c1484822baf9e5ef16501afbc38ae102a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Fri, 19 Mar 2021 17:02:56 +0000 Subject: [PATCH 44/44] pre-commit --- scprep/run/dyngen.py | 8 ++++---- test/test_run.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py index 9b821f83..5ce52f0a 100644 --- a/scprep/run/dyngen.py +++ b/scprep/run/dyngen.py @@ -6,11 +6,11 @@ args="""lib=.libPaths()[1], dependencies=NA, repos='http://cran.rstudio.com', verbose=TRUE""", body=""" - install.packages(c("dynwrap", "dyngen"), + install.packages(c("dynwrap", "dyngen"), lib=lib, repos=repos, dependencies=dependencies) - """ + """, ) _get_backbones = r_function.RFunction( @@ -127,13 +127,13 @@ def install( verbose: boolean, optional (default: True) Install script verbosity. """ - + kwargs = {} if lib is not None: kwargs["lib"] = lib if dependencies is not None: kwargs["dependencies"] = dependencies - + _install_dyngen( repos=repos, verbose=verbose, diff --git a/test/test_run.py b/test/test_run.py index f7a180d8..68e5da94 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -72,8 +72,9 @@ def test_install_github_dependencies_None(): assert fun() def test_install_github_dependencies_True(): - scprep.run.install_github("twitter/AnomalyDetection", verbose=False, - dependencies=True) + scprep.run.install_github( + "twitter/AnomalyDetection", verbose=False, dependencies=True + ) fun = scprep.run.RFunction( body=""" if (!require("pacman", quietly=TRUE)) { @@ -295,7 +296,7 @@ class TestDyngen(unittest.TestCase): @classmethod def setUpClass(self): scprep.run.dyngen.install(verbose=False) - + def test_install_dyngen_lib(self): scprep.run.dyngen.install(verbose=False) fun = scprep.run.RFunction(