diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 00000000..efe204a4 --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,44 @@ +name: pre-commit +on: + push: + branches-ignore: + - 'master' + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - name: Cancel Previous Runs + uses: styfle/cancel-workflow-action@0.6.0 + with: + access_token: ${{ github.token }} + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v2 + with: + python-version: "3.7" + architecture: "x64" + + - uses: actions/cache@v2 + with: + path: ~/.cache/pre-commit + key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }}- + + - uses: pre-commit/action@v2.0.0 + continue-on-error: true + + - name: Commit files + run: | + if [[ `git status --porcelain --untracked-files=no` ]]; then + git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "github-actions[bot]" + git commit -m "pre-commit" -a + fi + + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ${{ github.ref }} diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index be00719e..9280e307 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -36,8 +36,7 @@ jobs: run: | python -m pip install --upgrade pip pip install -U wheel setuptools - pip install -U black flake8 - + pip install -U black flake8 hacking - name: Lint with Black run: | black . --check --diff @@ -56,9 +55,10 @@ jobs: fail-fast: false matrix: config: - - {name: 'current', os: ubuntu-latest, python: '3.8', r: 'release' } - - {name: 'prev', os: ubuntu-latest, python: '3.7', r: 'release' } - - {name: 'old', os: ubuntu-latest, python: '3.6', r: 'release' } + - {name: '3.9', os: ubuntu-latest, python: '3.9', r: 'release' } + - {name: '3.8', os: ubuntu-latest, python: '3.8', r: 'release' } + - {name: '3.7', os: ubuntu-latest, python: '3.7', r: 'release' } + - {name: '3.6', os: ubuntu-latest, python: '3.6', r: 'release' } steps: - name: Cancel Previous Runs @@ -74,7 +74,7 @@ jobs: if: runner.os == 'Linux' run: | sudo apt-get update -qq - sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg + sudo apt-get install -y pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf5-dev - name: Set up Python uses: actions/setup-python@v2 @@ -116,8 +116,17 @@ jobs: renv::restore() renv::install("bioc::splatter") renv::install("bioc::slingshot") + renv::install("github::dynverse/dyngen") + renv::install("github::dynverse/dynwrap") shell: Rscript {0} + - name: Install package & dependencies + run: | + python -m pip install --upgrade pip + pip install -U wheel setuptools + pip install -U .[test] + python -c "import scprep" + - name: Run tests run: nose2 -vvv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..a5339641 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,26 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.3.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: \.(ai|gz)$ + - repo: https://github.com/timothycrosley/isort + rev: 5.6.4 + hooks: + - id: isort + - repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black + args: ['--target-version=py36'] + - repo: https://github.com/pre-commit/mirrors-autopep8 + rev: v1.5.4 + hooks: + - id: autopep8 + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + additional_dependencies: ['hacking'] diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 731c0b8d..00000000 --- a/.travis.yml +++ /dev/null @@ -1,59 +0,0 @@ -language: python -python: - - '3.5' - - '3.6' - - 3.7-dev -sudo: required -dist: xenial -addons: - apt: - packages: - - libhdf5-dev - - ffmpeg - - pandoc - - gfortran - - libblas-dev - - liblapack-dev - - libglu1-mesa-dev - - freeglut3-dev - - mesa-common-dev - - libgsl-dev -cache: - - pip - - apt - - directories: - - $HOME/R/Library -install: - - pip install -U . -before_script: - - >- - sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys - E298A3A825C0D65DFD57CBB651716619E084DAB9 - - >- - echo "deb http://cran.rstudio.com/bin/linux/ubuntu xenial-cran35/" | sudo - tee -a /etc/apt/sources.list - - sudo apt-get update -qq - - sudo apt-get install r-base-core=3.6\* -y - - export R_LIBS_USER="$HOME/R/Library" - - 'echo ".libPaths(c(''$R_LIBS_USER'', .libPaths()))" >> $HOME/.Rprofile' - - Rscript travis_setup.R -script: - - python -c "import scprep" - - 'pip install -U .[test]' - - 'if [ "$TRAVIS_PYTHON_VERSION" != "3.5" ]; then black . --check --diff --target-version py35; fi' - - python setup.py test - - 'pip install -U .[doc]' - - python setup.py build_sphinx -after_success: - - coveralls -deploy: - provider: pypi - user: scottgigante - password: '${PYPI_PASSWORD}' - distributions: sdist bdist_wheel - skip_existing: true - skip_cleanup: true - 'on': - tags: true - branch: master - diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 10ef1ee7..24387d42 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -73,4 +73,4 @@ available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.ht [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see -https://www.contributor-covenant.org/faq \ No newline at end of file +https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a6b9b910..b7a45186 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -25,7 +25,7 @@ Code Style and Testing `scprep` is maintained at close to 100% code coverage. Contributors are encouraged to write tests for their code, but if you do not know how to do so, please do not feel discouraged from contributing code! Others can always help you test your contribution. -Code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage). To automatically reformat your code when you run `git commit`, you can run `./autoblack.sh` in the root directory of this project to add a hook to your `git` repository. +Code style is dictated by [`black`](https://pypi.org/project/black/#installation-and-usage) and [`flake8`](https://flake8.pycqa.org/en/latest/) with [`hacking`](https://github.com/openstack/hacking). Code is automatically reformatted by [`pre-commit`](https://pre-commit.com/) when you push to GitHub. Code of Conduct --------------- diff --git a/README.rst b/README.rst index 03e5cca5..427c7a40 100644 --- a/README.rst +++ b/README.rst @@ -25,6 +25,12 @@ .. image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black :alt: Code style: black +.. image:: https://img.shields.io/badge/style%20guide-openstack-eb1a32.svg + :target: https://docs.openstack.org/hacking/latest/user/hacking.html#styleguide + :alt: Style Guide: OpenStack +.. image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white + :target: https://github.com/pre-commit/pre-commit + :alt: pre-commit `scprep` provides an all-in-one framework for loading, preprocessing, and plotting matrices in Python, with a focus on single-cell genomics. @@ -64,7 +70,7 @@ You can use `scprep` with your single cell data as follows:: # Filter by mitochondrial expression to remove dead cells mt_genes = scprep.select.get_gene_set(data, starts_with="MT") scprep.plot.plot_gene_set_expression(data, genes=mt_genes, percentile=90) - data = scprep.filter.filter_gene_set_expression(data, genes=mt_genes, + data = scprep.filter.filter_gene_set_expression(data, genes=mt_genes, percentile=90) # Library size normalize data = scprep.normalize.library_size_normalize(data) diff --git a/autoblack.sh b/autoblack.sh deleted file mode 100644 index 2ac8fd82..00000000 --- a/autoblack.sh +++ /dev/null @@ -1,14 +0,0 @@ -cat <> .git/hooks/pre-commit -#!/bin/sh - -set -e - -files=\$(git diff --staged --name-only --diff-filter=d -- "*.py") - -for file in \$files; do - black -t py35 -q \$file - git add \$file -done -EOF -chmod +x .git/hooks/pre-commit - diff --git a/data/test_data/gene_symbols.csv b/data/test_data/gene_symbols.csv index 08d4a237..a6f11fc4 100644 --- a/data/test_data/gene_symbols.csv +++ b/data/test_data/gene_symbols.csv @@ -1 +1 @@ -Arl8b,Cdc16,Lrrc8b,0610009B22Rik,Apoe,Asap1,Gstm5,Mok,Rps27l,Stap2,Prpf40a,Pam16,Rnf220,Tmem9b,Rdx,Nupr1l,4930455B14Rik,Sdf2l1,4921517D22Rik,Psmd8,Dynlrb2,Smco4,Gm14285,Ctdp1,Hnrnpu,Ldhb,Cep63,Unc50,Chd5,Ift22,Ankrd13a,Atp5o,Cdhr4,Atp1b3,Gm553,Phtf1,Fam177a,Dgkh,Atoh8,1700019G24Rik,Sycp1,1110008F13Rik,Pgam2,Ift74,Akr1cl,Adam30,Psmd12,mt-Cytb,Ccdc33,Gsto2,Gm16208,C1ql1,Loxhd1,Dhfr,Fgf13,D130052B06Rik,Pcdhb3,Krt32,4833407H14Rik,Abcc5,Vmn1r118,Ccdc54,Megf6,Gm13872,Erp44,Rgs13,Slc2a3,Gm11116,Gm16279,Trmt1,Gm7697,Gm11579,Zdhhc20,4930444P10Rik,RP23-449M8.6,Gm12631,Stoml3,Metrn,Tmco5,Fabp12,Gm3486,Hnf1aos2,Tmem200b,Olfr91,Gm19273,Hmga1-rs1,Prl3d3,Crygf,D030040B21Rik,Serpinb9e,Fam126b,Gm26873,Gm42435,Dmxl2,Cep164,Kansl2,Mgat1,Thrsp,Gm20821,Olfr203 \ No newline at end of file +Arl8b,Cdc16,Lrrc8b,0610009B22Rik,Apoe,Asap1,Gstm5,Mok,Rps27l,Stap2,Prpf40a,Pam16,Rnf220,Tmem9b,Rdx,Nupr1l,4930455B14Rik,Sdf2l1,4921517D22Rik,Psmd8,Dynlrb2,Smco4,Gm14285,Ctdp1,Hnrnpu,Ldhb,Cep63,Unc50,Chd5,Ift22,Ankrd13a,Atp5o,Cdhr4,Atp1b3,Gm553,Phtf1,Fam177a,Dgkh,Atoh8,1700019G24Rik,Sycp1,1110008F13Rik,Pgam2,Ift74,Akr1cl,Adam30,Psmd12,mt-Cytb,Ccdc33,Gsto2,Gm16208,C1ql1,Loxhd1,Dhfr,Fgf13,D130052B06Rik,Pcdhb3,Krt32,4833407H14Rik,Abcc5,Vmn1r118,Ccdc54,Megf6,Gm13872,Erp44,Rgs13,Slc2a3,Gm11116,Gm16279,Trmt1,Gm7697,Gm11579,Zdhhc20,4930444P10Rik,RP23-449M8.6,Gm12631,Stoml3,Metrn,Tmco5,Fabp12,Gm3486,Hnf1aos2,Tmem200b,Olfr91,Gm19273,Hmga1-rs1,Prl3d3,Crygf,D030040B21Rik,Serpinb9e,Fam126b,Gm26873,Gm42435,Dmxl2,Cep164,Kansl2,Mgat1,Thrsp,Gm20821,Olfr203 diff --git a/doc/Makefile b/doc/Makefile index 10f1a2ba..3f433e07 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -18,4 +18,4 @@ help: # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/source/conf.py b/doc/source/conf.py index 88a4b158..a537b55f 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -13,14 +13,15 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import glob + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os -import sys -import glob import shutil +import sys root_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) sys.path.insert(0, root_dir) @@ -211,4 +212,4 @@ \texttt{\strut{}{{ docname }}}\\[-0.5\baselineskip] \noindent\rule{\textwidth}{0.4pt}} \vspace{-2\baselineskip} -""" +""" # noqa diff --git a/doc/source/examples/index.rst b/doc/source/examples/index.rst index 88b1e810..9ad89301 100644 --- a/doc/source/examples/index.rst +++ b/doc/source/examples/index.rst @@ -4,4 +4,4 @@ Examples .. toctree:: scatter - jitter \ No newline at end of file + jitter diff --git a/requirements.txt b/requirements.txt index 31d927e5..5e580362 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ scipy>=0.18.1 scikit-learn>=0.19.1 pandas>=0.25 decorator>=4.3.0 +packaging diff --git a/scprep/__init__.py b/scprep/__init__.py index f5674ba3..9b4dd775 100644 --- a/scprep/__init__.py +++ b/scprep/__init__.py @@ -1,20 +1,17 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - +from . import _patch from .version import __version__ + +import scprep.filter import scprep.io import scprep.io.hdf5 -import scprep.select -import scprep.filter -import scprep.normalize -import scprep.transform import scprep.measure +import scprep.normalize import scprep.plot -import scprep.sanitize -import scprep.stats import scprep.reduce import scprep.run - -from . import _patch +import scprep.sanitize +import scprep.select +import scprep.stats +import scprep.transform _patch.patch_fill_value() diff --git a/scprep/_lazyload.py b/scprep/_lazyload.py index 1ecb13f2..7089cd54 100644 --- a/scprep/_lazyload.py +++ b/scprep/_lazyload.py @@ -1,10 +1,13 @@ import importlib import sys -# Key: -# { module : [{submodule1:[subsubmodule1, subsubmodule2]}, submodule2] } -# each module loads submodules on initialization but is only imported -# and loads methods/classes when these are accessed +"""Key: + +{ module : [{submodule1:[subsubmodule1, subsubmodule2]}, submodule2] } + +Each module loads submodules on initialization but is only imported +and loads methods/classes when these are accessed. +""" _importspec = { "matplotlib": [ "colors", @@ -31,7 +34,20 @@ class AliasModule(object): + """Wrapper around Python module to allow lazy loading.""" + def __init__(self, name, members=None): + """Initialize a module without loading it. + + Parameters + ---------- + name : str + Module name + members : list[str, dict] + List of submodules to be loaded as AliasModules. If a dict, the submodule + is loaded with subsubmodules corresponding to the dictionary values; + if a string, the submodule has no subsubmodules. + """ # easy access to AliasModule members to avoid recursionerror super_setattr = super().__setattr__ if members is None: @@ -56,6 +72,7 @@ def __init__(self, name, members=None): @property def __loaded_module__(self): + """Load the module, or retrieve it if already loaded.""" # easy access to AliasModule members to avoid recursionerror super_getattr = super().__getattribute__ name = super_getattr("__module_name__") @@ -67,6 +84,7 @@ def __loaded_module__(self): return sys.modules[name] def __getattribute__(self, attr): + """Access AliasModule members.""" # easy access to AliasModule members to avoid recursionerror super_getattr = super().__getattribute__ if attr in super_getattr("__submodules__"): @@ -85,8 +103,10 @@ def __getattribute__(self, attr): return getattr(super_getattr("__loaded_module__"), attr) def __setattr__(self, name, value): - # allows monkey-patching - # easy access to AliasModule members to avoid recursionerror + """Allow monkey-patching. + + Gives easy access to AliasModule members to avoid recursionerror. + """ super_getattr = super().__getattribute__ return setattr(super_getattr("__loaded_module__"), name, value) diff --git a/scprep/filter.py b/scprep/filter.py index 02b78ff3..37837a18 100644 --- a/scprep/filter.py +++ b/scprep/filter.py @@ -1,17 +1,15 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from . import measure +from . import select +from . import utils +from scipy import sparse +import numbers import numpy as np import pandas as pd -from scipy import sparse - import warnings -import numbers - -from . import utils, measure, select -def remove_empty_genes(data, *extra_data): +def remove_empty_genes(data, *extra_data): # noqa warnings.warn( "`scprep.filter.remove_empty_genes` is deprecated. " "Use `scprep.filter.filter_empty_genes` instead.", @@ -20,7 +18,7 @@ def remove_empty_genes(data, *extra_data): return filter_empty_genes(data, *extra_data) -def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): +def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): # noqa warnings.warn( "`scprep.filter.remove_rare_genes` is deprecated. " "Use `scprep.filter.filter_rare_genes` instead.", @@ -29,7 +27,7 @@ def remove_rare_genes(data, *extra_data, cutoff=0, min_cells=5): return filter_rare_genes(data, *extra_data, cutoff=cutoff, min_cells=min_cells) -def remove_empty_cells(data, *extra_data, sample_labels=None): +def remove_empty_cells(data, *extra_data, sample_labels=None): # noqa warnings.warn( "`scprep.filter.remove_empty_cells` is deprecated. " "Use `scprep.filter.filter_empty_cells` instead.", @@ -38,7 +36,7 @@ def remove_empty_cells(data, *extra_data, sample_labels=None): return filter_empty_cells(data, *extra_data, sample_labels=sample_labels) -def remove_duplicates(data, *extra_data, sample_labels=None): +def remove_duplicates(data, *extra_data, sample_labels=None): # noqa warnings.warn( "`scprep.filter.remove_duplicates` is deprecated. " "Use `scprep.filter.filter_duplicates` instead.", @@ -140,7 +138,7 @@ def filter_values( keep_cells="above", return_values=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove all cells with `values` above or below a certain threshold. @@ -209,7 +207,7 @@ def filter_library_size( keep_cells=None, return_library_size=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove all cells with library size above or below a certain threshold. @@ -276,7 +274,7 @@ def filter_gene_set_expression( keep_cells=None, return_expression=False, sample_labels=None, - filter_per_sample=None + filter_per_sample=None, ): """Remove cells with total expression of a gene set above or below a threshold. diff --git a/scprep/io/__init__.py b/scprep/io/__init__.py index d34bd92c..58a1656d 100644 --- a/scprep/io/__init__.py +++ b/scprep/io/__init__.py @@ -1,9 +1,10 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - -from .csv import load_csv, load_tsv -from .tenx import load_10X, load_10X_zip, load_10X_HDF5 +from . import download +from . import hdf5 +from .csv import load_csv +from .csv import load_tsv from .fcs import load_fcs -from .mtx import load_mtx, save_mtx - -from . import download, hdf5 +from .mtx import load_mtx +from .mtx import save_mtx +from .tenx import load_10X +from .tenx import load_10X_HDF5 +from .tenx import load_10X_zip diff --git a/scprep/io/csv.py b/scprep/io/csv.py index 0677a48f..4d082eb7 100644 --- a/scprep/io/csv.py +++ b/scprep/io/csv.py @@ -1,11 +1,8 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from .. import utils +from .utils import _matrix_to_data_frame import pandas as pd -from .utils import _matrix_to_data_frame -from .. import utils - def _read_csv_sparse(filename, chunksize=10000, fill_value=0.0, **kwargs): """Read a csv file into a pd.DataFrame[pd.SparseArray].""" @@ -24,7 +21,7 @@ def load_csv( cell_names=True, sparse=False, chunksize=10000, - **kwargs + **kwargs, ): r"""Load a csv file. @@ -111,7 +108,7 @@ def load_tsv( gene_names=True, cell_names=True, sparse=False, - **kwargs + **kwargs, ): r"""Load a tsv file. diff --git a/scprep/io/download.py b/scprep/io/download.py index f399d1b5..1c6eed87 100644 --- a/scprep/io/download.py +++ b/scprep/io/download.py @@ -1,10 +1,10 @@ -import zipfile -import tempfile +from .. import utils +from .._lazyload import requests + import os +import tempfile import urllib.request - -from .._lazyload import requests -from .. import utils +import zipfile _CHUNK_SIZE = 32768 _GOOGLE_DRIVE_URL = "https://docs.google.com/uc?export=download" diff --git a/scprep/io/fcs.py b/scprep/io/fcs.py index e8d29a8a..bbfac0f5 100644 --- a/scprep/io/fcs.py +++ b/scprep/io/fcs.py @@ -1,17 +1,14 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from .. import utils +from .._lazyload import fcsparser +from .utils import _matrix_to_data_frame +from io import BytesIO -import pandas as pd import numpy as np -import struct -from io import BytesIO +import pandas as pd import string +import struct import warnings -from .utils import _matrix_to_data_frame -from .._lazyload import fcsparser -from .. import utils - def _channel_names_from_meta(meta, channel_numbers, naming="N"): try: @@ -262,7 +259,7 @@ def load_fcs( channel_naming="$PnS", reformat_meta=True, override=False, - **kwargs + **kwargs, ): """Load a fcs file. diff --git a/scprep/io/hdf5.py b/scprep/io/hdf5.py index 0abc9291..1dca5dae 100644 --- a/scprep/io/hdf5.py +++ b/scprep/io/hdf5.py @@ -1,10 +1,7 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 -from decorator import decorator - -from .._lazyload import tables -from .._lazyload import h5py from .. import utils +from .._lazyload import h5py +from .._lazyload import tables +from decorator import decorator try: ModuleNotFoundError @@ -15,6 +12,7 @@ @decorator def with_HDF5(fun, *args, **kwargs): + """Ensure that HDF5 is available to run the decorated function.""" if not (utils._try_import("tables") or utils._try_import("h5py")): raise ModuleNotFoundError( "Found neither tables nor h5py. " diff --git a/scprep/io/mtx.py b/scprep/io/mtx.py index 282c1170..3993a7bf 100644 --- a/scprep/io/mtx.py +++ b/scprep/io/mtx.py @@ -1,13 +1,10 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - -import scipy.io as sio +from .. import utils +from .utils import _matrix_to_data_frame from scipy import sparse -import pandas as pd -import os -from .utils import _matrix_to_data_frame -from .. import utils +import os +import pandas as pd +import scipy.io as sio def load_mtx(mtx_file, cell_axis="row", gene_names=None, cell_names=None, sparse=None): diff --git a/scprep/io/tenx.py b/scprep/io/tenx.py index a066fb7c..7eb96855 100644 --- a/scprep/io/tenx.py +++ b/scprep/io/tenx.py @@ -1,19 +1,16 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from . import hdf5 +from .utils import _matrix_to_data_frame +import numpy as np +import os import pandas as pd import scipy.io as sio import scipy.sparse as sp -import warnings -import numpy as np -import os -import zipfile +import shutil import tempfile import urllib -import shutil - -from .utils import _matrix_to_data_frame -from . import hdf5 +import warnings +import zipfile def _combine_gene_id(symbols, ids): diff --git a/scprep/io/utils.py b/scprep/io/utils.py index e68b77db..9ca4ea2c 100644 --- a/scprep/io/utils.py +++ b/scprep/io/utils.py @@ -1,12 +1,10 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from .. import sanitize +from .. import utils +import numpy as np import pandas as pd import scipy.sparse as sp import warnings -import numpy as np - -from .. import utils, sanitize def _parse_header(header, n_expected, header_type="gene_names"): diff --git a/scprep/measure.py b/scprep/measure.py index ecf7fef4..894770fc 100644 --- a/scprep/measure.py +++ b/scprep/measure.py @@ -1,9 +1,10 @@ +from . import select +from . import utils +from scipy import sparse + import numpy as np import pandas as pd import scipy.signal -from scipy import sparse - -from . import utils, select def library_size(data): diff --git a/scprep/normalize.py b/scprep/normalize.py index 1cd91562..ce39a40c 100644 --- a/scprep/normalize.py +++ b/scprep/normalize.py @@ -1,13 +1,12 @@ -# author: Daniel Burkhardt -# (C) 2018 Krishnaswamy Lab GPLv2 - +from . import measure +from . import utils +from scipy import sparse from sklearn.preprocessing import normalize + +import numbers import numpy as np -from scipy import sparse import pandas as pd -import numbers import warnings -from . import measure, utils def _get_scaled_libsize(data, rescale=10000, return_library_size=False): diff --git a/scprep/plot/__init__.py b/scprep/plot/__init__.py index 582a8a0d..b6ddf9ab 100644 --- a/scprep/plot/__init__.py +++ b/scprep/plot/__init__.py @@ -1,7 +1,13 @@ -from .scatter import scatter, scatter2d, scatter3d, rotate_scatter3d -from .histogram import histogram, plot_library_size, plot_gene_set_expression +from . import colors +from . import tools +from .histogram import histogram +from .histogram import plot_gene_set_expression +from .histogram import plot_library_size +from .jitter import jitter from .marker import marker_plot +from .scatter import rotate_scatter3d +from .scatter import scatter +from .scatter import scatter2d +from .scatter import scatter3d from .scree import scree_plot -from .jitter import jitter from .variable_genes import plot_gene_variability -from . import tools, colors diff --git a/scprep/plot/colors.py b/scprep/plot/colors.py index 07093fb1..095dbcdb 100644 --- a/scprep/plot/colors.py +++ b/scprep/plot/colors.py @@ -1,7 +1,7 @@ -import numpy as np +from .._lazyload import matplotlib as mpl from . import tools -from .._lazyload import matplotlib as mpl +import numpy as np plt = mpl.pyplot diff --git a/scprep/plot/histogram.py b/scprep/plot/histogram.py index c6a21b42..f6761e9c 100644 --- a/scprep/plot/histogram.py +++ b/scprep/plot/histogram.py @@ -1,11 +1,14 @@ -import numpy as np -import numbers - +from .. import measure +from .. import utils +from .tools import label_axis +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize from scipy import sparse -from .. import measure, utils -from .utils import _get_figure, show, temp_fontsize, parse_fontsize -from .tools import label_axis +import numbers +import numpy as np _EPS = np.finfo("float").eps @@ -77,7 +80,7 @@ def histogram( alpha=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot a histogram. @@ -204,7 +207,7 @@ def plot_library_size( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the library size histogram. @@ -292,7 +295,7 @@ def plot_gene_set_expression( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the histogram of the expression of a gene set. diff --git a/scprep/plot/jitter.py b/scprep/plot/jitter.py index 49c9d177..8159bb3b 100644 --- a/scprep/plot/jitter.py +++ b/scprep/plot/jitter.py @@ -1,11 +1,16 @@ -import numpy as np -import pandas as pd - from .. import utils -from .utils import _get_figure, show, temp_fontsize, parse_fontsize, _with_default -from .tools import label_axis, generate_colorbar, generate_legend - from .scatter import _ScatterParams +from .tools import generate_colorbar +from .tools import generate_legend +from .tools import label_axis +from .utils import _get_figure +from .utils import _with_default +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize + +import numpy as np +import pandas as pd class _JitterParams(_ScatterParams): @@ -60,7 +65,7 @@ def jitter( vmax=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a jitter plot. diff --git a/scprep/plot/marker.py b/scprep/plot/marker.py index 72c183ae..0ccbf6ad 100644 --- a/scprep/plot/marker.py +++ b/scprep/plot/marker.py @@ -1,10 +1,16 @@ -import numpy as np -import pandas as pd +from .. import select +from .. import stats +from .. import utils +from .tools import label_axis +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import shift_ticklabels +from .utils import show +from .utils import temp_fontsize from scipy.cluster import hierarchy -from .. import utils, stats, select -from .utils import _get_figure, show, temp_fontsize, parse_fontsize, shift_ticklabels -from .tools import label_axis +import numpy as np +import pandas as pd def _make_scatter_arrays( diff --git a/scprep/plot/scatter.py b/scprep/plot/scatter.py index 8da64547..e61c0a04 100644 --- a/scprep/plot/scatter.py +++ b/scprep/plot/scatter.py @@ -1,29 +1,25 @@ -import numpy as np +from .. import select +from .. import utils +from .._lazyload import matplotlib as mpl +from . import colors +from .tools import create_colormap +from .tools import create_normalize +from .tools import generate_colorbar +from .tools import generate_legend +from .tools import label_axis +from .utils import _get_figure +from .utils import _in_ipynb +from .utils import _is_color_array +from .utils import _with_default +from .utils import parse_fontsize +from .utils import show +from .utils import temp_fontsize + import numbers +import numpy as np import pandas as pd import warnings -from .. import utils, select -from .utils import ( - _get_figure, - _is_color_array, - show, - _in_ipynb, - parse_fontsize, - temp_fontsize, - _with_default, -) -from .tools import ( - create_colormap, - create_normalize, - label_axis, - generate_colorbar, - generate_legend, -) -from . import colors - -from .._lazyload import matplotlib as mpl - plt = mpl.pyplot @@ -605,7 +601,7 @@ def scatter( azim=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a scatter plot. @@ -857,7 +853,7 @@ def scatter2d( legend_ncol=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a 2D scatter plot. @@ -1050,7 +1046,7 @@ def scatter3d( azim=None, filename=None, dpi=None, - **plot_kwargs + **plot_kwargs, ): """Create a 3D scatter plot. @@ -1233,7 +1229,7 @@ def rotate_scatter3d( elev=None, ipython_html="jshtml", dpi=None, - **kwargs + **kwargs, ): """Create a rotating 3D scatter plot. diff --git a/scprep/plot/scree.py b/scprep/plot/scree.py index c49bd15d..12c72358 100644 --- a/scprep/plot/scree.py +++ b/scprep/plot/scree.py @@ -1,10 +1,11 @@ -import numpy as np - from .. import utils from .._lazyload import matplotlib as mpl - -from .utils import _get_figure, show, temp_fontsize from .tools import label_axis +from .utils import _get_figure +from .utils import show +from .utils import temp_fontsize + +import numpy as np @utils._with_pkg(pkg="matplotlib", min_version=3) @@ -18,7 +19,7 @@ def scree_plot( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the explained variance of each principal component. diff --git a/scprep/plot/tools.py b/scprep/plot/tools.py index e8324fda..8d82ed98 100644 --- a/scprep/plot/tools.py +++ b/scprep/plot/tools.py @@ -1,10 +1,11 @@ -import numpy as np -import warnings - from .. import utils -from .utils import _get_figure, parse_fontsize, temp_fontsize - from .._lazyload import matplotlib as mpl +from .utils import _get_figure +from .utils import parse_fontsize +from .utils import temp_fontsize + +import numpy as np +import warnings plt = mpl.pyplot @@ -98,7 +99,7 @@ def generate_legend( title_fontsize=None, max_rows=10, ncol=None, - **kwargs + **kwargs, ): """Generate a legend on an axis. @@ -179,7 +180,7 @@ def generate_colorbar( n_ticks="auto", labelpad=10, mappable=None, - **kwargs + **kwargs, ): """Generate a colorbar on an axis. diff --git a/scprep/plot/utils.py b/scprep/plot/utils.py index 4ab07191..8f9986a9 100644 --- a/scprep/plot/utils.py +++ b/scprep/plot/utils.py @@ -1,11 +1,10 @@ -import numpy as np -import platform - from .. import utils - from .._lazyload import matplotlib as mpl from .._lazyload import mpl_toolkits +import numpy as np +import platform + plt = mpl.pyplot @@ -146,16 +145,21 @@ def parse_fontsize(size=None, default=None): class temp_fontsize(object): + """Context manager to temporarily change matplotlib font size.""" + def __init__(self, size=None): + """Initialize the context manager.""" if size is None: size = plt.rcParams["font.size"] self.size = size def __enter__(self): + """Temporarily set the font size.""" self.old_size = plt.rcParams["font.size"] plt.rcParams["font.size"] = self.size def __exit__(self, type, value, traceback): + """Change the font size back to default.""" plt.rcParams["font.size"] = self.old_size diff --git a/scprep/plot/variable_genes.py b/scprep/plot/variable_genes.py index 583b3985..82ff8692 100644 --- a/scprep/plot/variable_genes.py +++ b/scprep/plot/variable_genes.py @@ -1,5 +1,6 @@ +from .. import measure +from .. import utils from .scatter import scatter -from .. import utils, measure @utils._with_pkg(pkg="matplotlib", min_version=3) @@ -17,7 +18,7 @@ def plot_gene_variability( fontsize=None, filename=None, dpi=None, - **kwargs + **kwargs, ): """Plot the histogram of gene variability. diff --git a/scprep/reduce.py b/scprep/reduce.py index a29d5407..2b3b0a6c 100644 --- a/scprep/reduce.py +++ b/scprep/reduce.py @@ -1,12 +1,13 @@ -from sklearn import decomposition, random_projection -import sklearn.base -import pandas as pd +from . import utils from scipy import sparse +from sklearn import decomposition +from sklearn import random_projection + import numpy as np +import pandas as pd +import sklearn.base import warnings -from . import utils - class InvertibleRandomProjection(random_projection.GaussianRandomProjection): """Gaussian random projection with an inverse transform using the pseudoinverse.""" diff --git a/scprep/run/__init__.py b/scprep/run/__init__.py index 0a07c96e..355e8930 100644 --- a/scprep/run/__init__.py +++ b/scprep/run/__init__.py @@ -1,3 +1,6 @@ -from .r_function import RFunction, install_bioconductor -from .splatter import SplatSimulate +from .dyngen import DyngenSimulate +from .r_function import install_bioconductor +from .r_function import install_github +from .r_function import RFunction from .slingshot import Slingshot +from .splatter import SplatSimulate diff --git a/scprep/run/conversion.py b/scprep/run/conversion.py index ae042423..1024df86 100644 --- a/scprep/run/conversion.py +++ b/scprep/run/conversion.py @@ -1,9 +1,10 @@ +from .. import utils +from .._lazyload import anndata2ri +from .._lazyload import rpy2 + import numpy as np import warnings -from .. import utils -from .._lazyload import rpy2, anndata2ri - def _rpylist2py(robject): if not isinstance(robject, rpy2.robjects.vectors.ListVector): @@ -46,15 +47,12 @@ def _pysce2rpy(pyobject): def _is_r_object(obj): - return ( - "rpy2.robjects" in str(type(obj)) - or "rpy2.rinterface" in str(type(obj)) - or obj is rpy2.rinterface.NULL - ) + return "rpy2.robjects" in str(type(obj)) or "rpy2.rinterface" in str(type(obj)) def _is_builtin(obj): - return isinstance(obj, (int, str, float)) + """Check if an object need not be converted.""" + return isinstance(obj, (float, int, str, bool)) @utils._with_pkg(pkg="rpy2", min_version="3.0") diff --git a/scprep/run/dyngen.py b/scprep/run/dyngen.py new file mode 100644 index 00000000..5ce52f0a --- /dev/null +++ b/scprep/run/dyngen.py @@ -0,0 +1,316 @@ +from . import r_function + +import pandas as pd + +_install_dyngen = r_function.RFunction( + args="""lib=.libPaths()[1], dependencies=NA, + repos='http://cran.rstudio.com', verbose=TRUE""", + body=""" + install.packages(c("dynwrap", "dyngen"), + lib=lib, + repos=repos, + dependencies=dependencies) + """, +) + +_get_backbones = r_function.RFunction( + setup=""" + library(dyngen) + """, + body=""" + names(list_backbones()) + """, +) + +_DyngenSimulate = r_function.RFunction( + args=""" + backbone_name=character(), num_cells=500, num_tfs=100, num_targets=50, + num_hks=25,simulation_census_interval=10, compute_cellwise_grn=FALSE, + compute_rna_velocity=FALSE, n_jobs=7, random_state=NA, verbose=TRUE + """, + setup=""" + library(dyngen) + """, + body=""" + if (!is.na(random_state)) { + set.seed(random_state) + } + + backbones <- list('bifurcating'=backbone_bifurcating(), + 'bifurcating_converging'=backbone_bifurcating_converging(), + 'bifurcating_cycle'=backbone_bifurcating_cycle(), + 'bifurcating_loop'=backbone_bifurcating_loop(), + 'binary_tree'=backbone_binary_tree(), + 'branching'=backbone_branching(), + 'consecutive_bifurcating'=backbone_consecutive_bifurcating(), + 'converging'=backbone_converging(), + 'cycle'=backbone_cycle(), + 'cycle_simple'=backbone_cycle_simple(), + 'disconnected'=backbone_disconnected(), + 'linear'=backbone_linear(), + 'linear_simple'=backbone_linear_simple(), + 'trifurcating'=backbone_trifurcating() + ) + + backbone <- backbones[[backbone_name]] + # silent default behavior of dyngen + if (num_tfs < nrow(backbone$module_info)) { + if (verbose) { + cat("If input num_tfs is less than backbone default,", + "Dyngen uses backbone default.\n") + } + num_tfs <- nrow(backbone$module_info) + } + if (verbose) { + cat('Run Parameters:') + cat('\n\tBackbone:', backbone_name) + cat('\n\tNumber of Cells:', num_cells) + cat('\n\tNumber of TFs:', num_tfs) + cat('\n\tNumber of Targets:', num_targets) + cat('\n\tNumber of HKs:', num_hks, '\n') + } + + init <- initialise_model( + backbone=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_params=simulation_default( + census_interval=as.double(simulation_census_interval), + kinetics_noise_function = kinetics_noise_simple(mean=1, sd=0.005), + ssa_algorithm = ssa_etl(tau=300/3600), + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity), + num_cores = n_jobs, + download_cache_dir=NULL, + verbose=verbose + ) + out <- generate_dataset(init) + data <- list(cell_info = as.data.frame(out$dataset$cell_info), + expression = as.data.frame(as.matrix(out$dataset$expression))) + + if (compute_cellwise_grn) { + data[['bulk_grn']] <- as.data.frame(out$dataset$regulatory_network) + data[['cellwise_grn']] <- as.data.frame(out$dataset$regulatory_network_sc) + } + if (compute_rna_velocity) { + data[['rna_velocity']] <- as.data.frame(as.matrix(out$dataset$rna_velocity)) + } + + data + """, +) + + +def install( + lib=None, + dependencies=None, + repos="http://cran.us.r-project.org", + verbose=True, +): + """Install Dyngen from CRAN. + + Parameters + ---------- + lib: string + Directory to install the package. + If missing, defaults to the first element of .libPaths(). + dependencies: boolean, optional (default: None/NA) + When True, installs all packages specified under "Depends", "Imports", + "LinkingTo" and "Suggests". + When False, installs no dependencies. + When None/NA, installs all packages specified under "Depends", "Imports" + and "LinkingTo". + repos: string, optional (default: "http://cran.us.r-project.org"): + R package repository. + verbose: boolean, optional (default: True) + Install script verbosity. + """ + + kwargs = {} + if lib is not None: + kwargs["lib"] = lib + if dependencies is not None: + kwargs["dependencies"] = dependencies + + _install_dyngen( + repos=repos, + verbose=verbose, + **kwargs, + ) + + +def get_backbones(): + """Output full list of cell trajectory backbones. + + Returns + ------- + backbones: array of backbone names + """ + return _get_backbones() + + +def DyngenSimulate( + backbone, + num_cells=500, + num_tfs=100, + num_targets=50, + num_hks=25, + simulation_census_interval=10, + compute_cellwise_grn=False, + compute_rna_velocity=False, + n_jobs=7, + random_state=None, + verbose=True, + force_num_cells=False, +): + """Simulate dataset with cellular backbone. + + The backbone determines the overall dynamic process during a simulation. + It consists of a set of gene modules, which regulate each other such that + expression of certain genes change over time in a specific manner. + + DyngenSimulate is a Python wrapper for the R package Dyngen. + Default values obtained from Github vignettes. + For more details, read about Dyngen on Github_. + + .. _Github: https://github.com/dynverse/dyngen + + Parameters + ---------- + backbone: string + Backbone name from dyngen list of backbones. + Get list with get_backbones()). + num_cells: int, optional (default: 500) + Number of cells. + num_tfs: int, optional (default: 100) + Number of transcription factors. + The TFs are the main drivers of the molecular changes in the simulation. + A TF can only be regulated by other TFs or itself. + + NOTE: If num_tfs input is less than nrow(backbone$module_info), + Dyngen will default to nrow(backbone$module_info). + This quantity varies between backbones and with each run (without seed). + It is generally less than 75. + It is recommended to input num_tfs >= 100 to stabilize the output. + num_targets: int, optional (default: 50) + Number of target genes. + Target genes are regulated by a TF or another target gene, + but are always downstream of at least one TF. + num_hks: int, optional (default: 25) + Number of housekeeping genees. + Housekeeping genes are completely separate from any TFs or target genes. + simulation_census_interval: int, optional (default: 10) + Stores the abundance levels only after a specific interval has passed. + The lower the interval, the higher detail of simulation trajectory retained, + though many timepoints will contain similar information. + compute_cellwise_grn: boolean, optional (default: False) + If True, computes the ground truth cellwise gene regulatory networks. + Also outputs ground truth bulk (entire dataset) regulatory network. + NOTE: Increases compute time significantly. + compute_rna_velocity: boolean, optional (default: False) + If true, computes the ground truth propensity ratios after simulation. + NOTE: Increases compute time significantly. + n_jobs: int, optional (default: 8) + Number of cores to use. + random_state: int, optional (default: None) + Fixes seed for simulation generator. + verbose: boolean, optional (default: True) + Data generation verbosity. + force_num_cells: boolean, optional (default: False) + Dyngen occassionally produces fewer cells than specified. + Set this flag to True to rerun Dyngen until correct cell count is reached. + + Returns + ------- + Dictionary data of pd.DataFrames: + data['cell_info']: pd.DataFrame, shape (n_cells, 4) + Columns: cell_id, step_ix, simulation_i, sim_time + sim_time is the simulated timepoint for a given cell. + + data['expression']: pd.DataFrame, shape (n_cells, n_genes) + Log-transformed counts with dropout. + + If compute_cellwise_grn is True, + data['bulk_grn']: pd.DataFrame, shape (n_tf_target_interactions, 4) + Columns: regulator, target, strength, effect. + Strength is positive and unbounded. + Effect is either +1 (for activation) or -1 (for inhibition). + + data['cellwise_grn']: pd.DataFrame, shape (n_tf_target_interactions_per_cell, 4) + Columns: cell_id, regulator, target, strength. + The output does not include all edges per cell. + The regulatory effect lies between [−1, 1], where -1 is complete inhibition + of target by TF, +1 is maximal activation of target by TF, + and 0 is inactivity of the regulatory interaction between R and T. + + If compute_rna_velocity is True, + data['rna_velocity']: pd.DataFrame, shape (n_cells, n_genes) + Propensity ratios for each cell. + + Example + -------- + >>> import scprep + >>> scprep.run.dyngen.install() + >>> backbones = scprep.run.dyngen.get_backbones() + >>> data = scprep.run.DyngenSimulate(backbone=backbones[0]) + """ + if backbone not in get_backbones(): + raise ValueError( + ( + "Input not in default backbone list. " + "Choose backbone from get_backbones()" + ) + ) + + kwargs = {} + if random_state is not None: + kwargs["random_state"] = random_state + + rdata = _DyngenSimulate( + backbone_name=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + rpy_verbose=verbose, + **kwargs, + ) + + if force_num_cells: + if random_state is None: + random_state = -1 + + if pd.DataFrame(rdata["cell_info"]).shape[0] != num_cells: + random_state += 1 + rdata = DyngenSimulate( + backbone=backbone, + num_cells=num_cells, + num_tfs=num_tfs, + num_targets=num_targets, + num_hks=num_hks, + simulation_census_interval=simulation_census_interval, + compute_cellwise_grn=compute_cellwise_grn, + compute_rna_velocity=compute_rna_velocity, + n_jobs=n_jobs, + verbose=verbose, + random_state=random_state, + force_num_cells=force_num_cells, + ) + + data = {} + data["cell_info"] = pd.DataFrame(rdata["cell_info"]) + data["expression"] = pd.DataFrame(rdata["expression"]) + if compute_cellwise_grn: + data["cellwise_grn"] = pd.DataFrame(rdata["cellwise_grn"]) + data["bulk_grn"] = pd.DataFrame(rdata["bulk_grn"]) + if compute_rna_velocity: + data["rna_velocity"] = pd.DataFrame(rdata["rna_velocity"]) + + return data diff --git a/scprep/run/r_function.py b/scprep/run/r_function.py index 37d66c72..58efa510 100644 --- a/scprep/run/r_function.py +++ b/scprep/run/r_function.py @@ -1,6 +1,6 @@ -from . import conversion from .. import utils from .._lazyload import rpy2 +from . import conversion def _console_warning(s, log_fn): @@ -178,3 +178,82 @@ def install_bioconductor( if version is not None: kwargs["version"] = version _install_bioconductor(**kwargs) + + +_install_github = RFunction( + args="""repo=character(), lib=.libPaths()[1], dependencies=NA, + update=FALSE, repos='http://cran.rstudio.com', + build_vignettes=FALSE, force=FALSE, verbose=TRUE""", + body=""" + quiet <- !verbose + + if (!require('remotes', quietly=TRUE)) install.packages('remotes') + remotes::install_github(repo=repo, + lib=lib, dependencies=dependencies, + upgrade=update, repos=repos, + build_vignettes=build_vignettes, + force=force, quiet=quiet) + + # prepend path to libPaths if new library + if (lib != .libPaths()[1]) .libPaths(c(lib, .libPaths())) + + if (verbose) cat('.libPaths():', .libPaths()) + """, +) + + +def install_github( + repo, + lib=None, + dependencies=None, + update=False, + repos="http://cran.us.r-project.org", + build_vignettes=False, + force=False, + verbose=True, +): + """Install a Github repository. + + Parameters + ---------- + repo: string + Github repository name to install. + lib: string + Directory to install the package. + If missing, defaults to the first element of .libPaths(). + dependencies: boolean, optional (default: None/NA) + When True, installs all packages specified under "Depends", "Imports", + "LinkingTo" and "Suggests". + When False, installs no dependencies. + When None/NA, installs all packages specified under "Depends", "Imports" + and "LinkingTo". + update: string or boolean, optional (default: False) + One of "default", "ask", "always", or "never". "default" + Respects R_REMOTES_UPGRADE variable if set, falls back to "ask" if unset. + "ask" prompts the user for which out of date packages to upgrade. + For non-interactive sessions "ask" is equivalent to "always". + TRUE and FALSE also accepted, correspond to "always" and "never" respectively. + repos: string, optional (default: "http://cran.us.r-project.org"): + R package repository. + build_vignettes: boolean, optional (default: False) + Builds Github vignettes. + force: boolean, optional (default: False) + Forces installation even if remote state has not changed since previous install. + verbose: boolean, optional (default: True) + Install script verbosity. + """ + kwargs = {} + if lib is not None: + kwargs["lib"] = lib + if dependencies is not None: + kwargs["dependencies"] = dependencies + + _install_github( + repo=repo, + update=update, + repos=repos, + build_vignettes=build_vignettes, + force=force, + verbose=verbose, + **kwargs, + ) diff --git a/scprep/run/slingshot.py b/scprep/run/slingshot.py index 67ec5377..2ee8247c 100644 --- a/scprep/run/slingshot.py +++ b/scprep/run/slingshot.py @@ -1,10 +1,10 @@ +from .. import utils +from . import r_function + import numpy as np import pandas as pd import warnings -from . import r_function -from .. import utils - def install(site_repository=None, update=False, version=None, verbose=True): """Install the required R packages to run Slingshot. diff --git a/scprep/run/splatter.py b/scprep/run/splatter.py index 41a16bf9..e3137e5c 100644 --- a/scprep/run/splatter.py +++ b/scprep/run/splatter.py @@ -1,9 +1,9 @@ -import numpy as np +from . import r_function + import numbers +import numpy as np import warnings -from . import r_function - def _sum_to_one(x): x = x / np.sum(x) # fix numerical error diff --git a/scprep/sanitize.py b/scprep/sanitize.py index 0140d4bc..c34c03ab 100644 --- a/scprep/sanitize.py +++ b/scprep/sanitize.py @@ -1,10 +1,8 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from . import utils import numpy as np import pandas as pd import warnings -from . import utils def check_numeric(data, dtype="float", copy=None, suppress_errors=False): diff --git a/scprep/select.py b/scprep/select.py index 6645c35b..fe79fb6b 100644 --- a/scprep/select.py +++ b/scprep/select.py @@ -1,12 +1,12 @@ +from . import utils +from scipy import sparse + +import numbers import numpy as np import pandas as pd -import numbers -from scipy import sparse -import warnings import re import sys - -from . import utils +import warnings if int(sys.version.split(".")[1]) < 7: _re_pattern = type(re.compile("")) @@ -312,7 +312,7 @@ def select_cols( starts_with=None, ends_with=None, exact_word=None, - regex=None + regex=None, ): """Select columns from a data matrix. @@ -470,7 +470,7 @@ def select_rows( starts_with=None, ends_with=None, exact_word=None, - regex=None + regex=None, ): """Select rows from a data matrix. diff --git a/scprep/stats.py b/scprep/stats.py index f441fd79..7f4f4faa 100644 --- a/scprep/stats.py +++ b/scprep/stats.py @@ -1,17 +1,18 @@ -# author: Daniel Burkhardt -# (C) 2018 Krishnaswamy Lab GPLv2 +from . import plot +from . import select +from . import utils +from ._lazyload import matplotlib +from scipy import sparse +from scipy import stats +from sklearn import metrics +from sklearn import neighbors +import joblib import numbers import numpy as np import pandas as pd -from scipy import stats, sparse -from sklearn import neighbors, metrics -import joblib -from . import plot, utils, select import warnings -from ._lazyload import matplotlib - plt = matplotlib.pyplot @@ -208,7 +209,7 @@ def knnDREMI( ) # constant input: mutual information is numerically zero if return_drevi: - return 0, None + return 0, np.zeros((n_bins, n_bins), dtype=float) else: return 0 @@ -407,7 +408,7 @@ def _preprocess_test_matrices(X, Y): def mean_difference(X, Y): - """Calculate the mean difference in genes between two datasets + """Calculate the mean difference in genes between two datasets. In the case where the data has been log normalized, this is equivalent to fold change. diff --git a/scprep/transform.py b/scprep/transform.py index 51b075e5..0a307ce4 100644 --- a/scprep/transform.py +++ b/scprep/transform.py @@ -1,10 +1,8 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 +from . import utils +from scipy import sparse import numpy as np -from scipy import sparse import warnings -from . import utils def sqrt(data): @@ -108,7 +106,7 @@ def arcsinh(data, cofactor=5): return utils.matrix_transform(data, np.arcsinh) -def sqrt_transform(*args, **kwargs): +def sqrt_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.sqrt_transform is deprecated. Please use " "scprep.transform.sqrt in future.", @@ -117,7 +115,7 @@ def sqrt_transform(*args, **kwargs): return sqrt(*args, **kwargs) -def log_transform(*args, **kwargs): +def log_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.log_transform is deprecated. Please use " "scprep.transform.log in future.", @@ -126,7 +124,7 @@ def log_transform(*args, **kwargs): return log(*args, **kwargs) -def arcsinh_transform(*args, **kwargs): +def arcsinh_transform(*args, **kwargs): # noqa warnings.warn( "scprep.transform.arcsinh_transform is deprecated. Please " "use scprep.transform.arcsinh in future.", diff --git a/scprep/utils.py b/scprep/utils.py index 754263ac..23a174b4 100644 --- a/scprep/utils.py +++ b/scprep/utils.py @@ -1,13 +1,12 @@ -import numpy as np -import pandas as pd +from decorator import decorator +from scipy import sparse -import numbers -import warnings import importlib +import numbers +import numpy as np +import pandas as pd import re - -from scipy import sparse -from decorator import decorator +import warnings try: ModuleNotFoundError diff --git a/scprep/version.py b/scprep/version.py index d6e6e024..6849410a 100644 --- a/scprep/version.py +++ b/scprep/version.py @@ -1,4 +1 @@ -# author: Scott Gigante -# (C) 2018 Krishnaswamy Lab GPLv2 - -__version__ = "1.0.13" +__version__ = "1.1.0" diff --git a/setup.cfg b/setup.cfg index 1c0a87e1..43bfdd83 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,7 +7,7 @@ warning-is-error = 0 [flake8] ignore = # top-level module docstring - D100, D104, W503 + D100, D104, W503, # space before : conflicts with black E203 per-file-ignores = @@ -20,5 +20,9 @@ exclude = build, dist, test, - doc, - Snakefile + doc + +[isort] +profile = black +force_single_line = true +force_alphabetical_sort = true diff --git a/setup.py b/setup.py index fd09f748..7bf49aa9 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ +from setuptools import find_packages +from setuptools import setup + import os -import sys -from setuptools import setup, find_packages install_requires = [ "numpy>=1.12.0", @@ -11,12 +12,7 @@ "packaging", ] -optional_requires = [ - "fcsparser", - "tables", - "h5py", - "anndata", -] +optional_requires = ["fcsparser", "tables", "h5py", "anndata", "anndata2ri>=1.0.6"] test_requires = [ "nose", @@ -28,6 +24,9 @@ "packaging", "mock", "h5py", + "matplotlib>=3.0", + "rpy2>=3.0", + "black", ] doc_requires = [ @@ -35,16 +34,9 @@ "sphinxcontrib-napoleon", "ipykernel", "nbsphinx", + "autodocsumm", ] -if sys.version_info[:2] < (3, 6): - test_requires += ["matplotlib>=3.0,<3.1", "rpy2>=3.0,<3.1"] - doc_requires += ["autodocsumm!=0.2.0"] -else: - test_requires += ["matplotlib>=3.0", "rpy2>=3.0", "black"] - optional_requires += ["anndata2ri>=1.0.6"] - doc_requires += ["autodocsumm"] - version_py = os.path.join(os.path.dirname(__file__), "scprep", "version.py") version = open(version_py).read().strip().split("=")[-1].replace('"', "").strip() @@ -57,9 +49,9 @@ author="Scott Gigante, Daniel Burkhardt and Jay Stanley, Yale University", author_email="krishnaswamylab@gmail.com", packages=find_packages(), - license="GNU General Public License Version 2", + license="GNU General Public License Version 3", install_requires=install_requires, - python_requires=">=3.5", + python_requires=">=3.6", extras_require={ "test": test_requires + optional_requires, "doc": doc_requires, @@ -86,8 +78,10 @@ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", "Topic :: Scientific/Engineering :: Bio-Informatics", ], ) diff --git a/test/_test_lazyload.py b/test/_test_lazyload.py index 5a24993b..decfbe07 100644 --- a/test/_test_lazyload.py +++ b/test/_test_lazyload.py @@ -1,6 +1,6 @@ -import numpy -import scipy -import pandas +import numpy # noqa +import pandas # noqa +import scipy # noqa import sys @@ -14,11 +14,11 @@ def test_lazyload(): for module in scprep._lazyload._importspec.keys(): if module == "anndata2ri" and sys.version_info[:2] < (3, 6): continue - assert module not in scprep_loaded, module - if module in postloaded_modules: - assert getattr(scprep._lazyload, module).__class__ is type(scprep), module + if module in preloaded_modules: + assert getattr(scprep._lazyload, module).__class__ is type(scprep) else: assert ( getattr(scprep._lazyload, module).__class__ is scprep._lazyload.AliasModule - ), module + ) + assert module not in scprep_loaded, module diff --git a/test/test_filter.py b/test/test_filter.py index 56d1f114..a251e778 100644 --- a/test/test_filter.py +++ b/test/test_filter.py @@ -1,10 +1,12 @@ -from tools import utils, matrix, data -import scprep -import pandas as pd -import numpy as np - -from scipy import sparse from functools import partial +from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + +import numpy as np +import pandas as pd +import scprep import unittest diff --git a/test/test_hdf5.py b/test/test_hdf5.py index cee47972..fc2b7796 100644 --- a/test/test_hdf5.py +++ b/test/test_hdf5.py @@ -1,9 +1,11 @@ -from tools import data, utils -import os +from tools import data +from tools import utils + +import h5py import mock +import os import scprep import sys -import h5py import tables diff --git a/test/test_io.py b/test/test_io.py index fdd9c34b..7e63a285 100644 --- a/test/test_io.py +++ b/test/test_io.py @@ -1,24 +1,22 @@ -import pandas as pd -import numpy as np -import fcsparser +from nose.tools import assert_raises +from parameterized import parameterized +from scipy import sparse +from tools import data +from tools import utils -import os -import sys import copy -import shutil -import zipfile -import urllib -import unittest +import fcsparser import mock - +import numpy as np +import os +import pandas as pd import scprep import scprep.io.utils - -from tools import data, utils - -from scipy import sparse -from parameterized import parameterized -from nose.tools import assert_raises +import shutil +import sys +import unittest +import urllib +import zipfile class TestMatrixToDataFrame(unittest.TestCase): @@ -200,7 +198,8 @@ def test_10X(): ) utils.assert_raises_message( FileNotFoundError, - "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and 'barcodes.tsv(.gz)' must be present " + "'matrix.mtx(.gz)', '[genes/features].tsv(.gz)', and " + "'barcodes.tsv(.gz)' must be present " "in {}".format(data.data_dir), scprep.io.load_10X, data.data_dir, @@ -238,7 +237,10 @@ def test_10X_zip_error(): def test_10X_zip_url(): X = data.load_10X() - filename = "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip" + filename = ( + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X.zip" + ) X_zip = scprep.io.load_10X_zip(filename) assert scprep.utils.is_sparse_dataframe(X_zip) assert np.sum(np.sum(X != X_zip)) == 0 @@ -725,7 +727,8 @@ def test_download_google_drive_large(): def test_download_url(): X = data.load_10X() scprep.io.download.download_url( - "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X/matrix.mtx.gz", + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X/matrix.mtx.gz", "url_test.mtx.gz", ) Y = scprep.io.load_mtx("url_test.mtx.gz").T @@ -736,7 +739,8 @@ def test_download_url(): def test_download_zip(): X = data.load_10X() scprep.io.download.download_and_extract_zip( - "https://github.com/KrishnaswamyLab/scprep/raw/master/data/test_data/test_10X.zip", + "https://github.com/KrishnaswamyLab/scprep/raw/master/data/" + "test_data/test_10X.zip", "zip_test", ) Y = scprep.io.load_10X("zip_test/test_10X") diff --git a/test/test_lazyload.py b/test/test_lazyload.py index e4cfab81..bb092572 100644 --- a/test/test_lazyload.py +++ b/test/test_lazyload.py @@ -1,9 +1,10 @@ -import subprocess +from tools import data + import mock import os import scprep +import subprocess import sys -from tools import data def test_lazyload(): diff --git a/test/test_measure.py b/test/test_measure.py index 5d0bf526..7f2102cf 100644 --- a/test/test_measure.py +++ b/test/test_measure.py @@ -1,10 +1,9 @@ -from tools import utils, matrix, data -import scprep -import pandas as pd -import numpy as np +from tools import data +from tools import matrix +from tools import utils -from scipy import sparse -from functools import partial +import numpy as np +import scprep import unittest diff --git a/test/test_normalize.py b/test/test_normalize.py index 42feda73..a0567782 100644 --- a/test/test_normalize.py +++ b/test/test_normalize.py @@ -1,9 +1,10 @@ -from tools import utils, matrix, data -import numpy as np from sklearn.preprocessing import normalize +from tools import data +from tools import matrix +from tools import utils +import numpy as np import scprep -from functools import partial import unittest diff --git a/test/test_patch.py b/test/test_patch.py index f6a5b628..67ca4c81 100644 --- a/test/test_patch.py +++ b/test/test_patch.py @@ -1,7 +1,8 @@ -import scprep +from pandas.core.internals.blocks import ExtensionBlock + import numpy as np import pandas as pd -from pandas.core.internals.blocks import ExtensionBlock +import scprep # noqa def test_pandas_series_rmatmul(): diff --git a/test/test_plot.py b/test/test_plot.py index c7358b9d..e354d0c1 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -1,23 +1,19 @@ +from packaging.version import Version +from scprep.plot.histogram import _symlog_bins +from scprep.plot.jitter import _JitterParams +from scprep.plot.scatter import _ScatterParams +from tools import data +from tools import utils + +import matplotlib import matplotlib.pyplot as plt import numpy as np +import os import pandas as pd - import scprep -import matplotlib - -import os import sys -import numbers import unittest -from packaging.version import Version - -from scprep.plot.scatter import _ScatterParams -from scprep.plot.jitter import _JitterParams -from scprep.plot.histogram import _symlog_bins - -from tools import data, utils - def try_remove(filename): try: @@ -1465,6 +1461,7 @@ def test_generate_colorbar_n_ticks(self): def test_generate_colorbar_vmin_vmax_none(self): cb = scprep.plot.tools.generate_colorbar("inferno") + assert len(cb.get_ticks()) == 0 utils.assert_warns_message( UserWarning, "Cannot set `n_ticks` without setting `vmin` and `vmax`.", diff --git a/test/test_reduce.py b/test/test_reduce.py index 51cc3054..723cf5f3 100644 --- a/test/test_reduce.py +++ b/test/test_reduce.py @@ -1,11 +1,13 @@ -from tools import utils, matrix, data -import scprep +from functools import partial from scipy import sparse -import numpy as np -import pandas as pd from sklearn import decomposition +from tools import data +from tools import matrix +from tools import utils -from functools import partial +import numpy as np +import pandas as pd +import scprep import unittest diff --git a/test/test_run.py b/test/test_run.py index 2dfd8c94..68e5da94 100644 --- a/test/test_run.py +++ b/test/test_run.py @@ -4,21 +4,24 @@ # python 3.5 pass else: - from tools import utils, matrix, data + from tools import data + from tools import matrix + from tools import utils + + import anndata + import mock import numpy as np import pandas as pd + import rpy2.rinterface_lib.callbacks + import rpy2.rinterface_lib.embedded import rpy2.robjects as ro + import scipy.sparse import scprep - import scprep.run.r_function - import scprep.run.conversion import scprep.run - import unittest - import anndata + import scprep.run.conversion + import scprep.run.r_function import sklearn.cluster - import scipy.sparse - import rpy2.rinterface_lib.callbacks - import rpy2.rinterface_lib.embedded - import mock + import unittest builtin_warning = rpy2.rinterface_lib.callbacks.consolewrite_warnerror @@ -40,6 +43,57 @@ def test_install_bioc(): verbose=False, ) + def test_install_github_lib(): + scprep.run.install_github("twitter/AnomalyDetection", verbose=False) + fun = scprep.run.RFunction( + body=""" + packages <- installed.packages() + 'AnomalyDetection' %in% packages + """ + ) + + assert fun() + + def test_install_github_dependencies_None(): + scprep.run.install_github("twitter/AnomalyDetection", verbose=False) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(AnomalyDetection, local=TRUE)[c("Depends", + "Imports","LinkingTo")] + all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + """ + ) + + assert fun() + + def test_install_github_dependencies_True(): + scprep.run.install_github( + "twitter/AnomalyDetection", verbose=False, dependencies=True + ) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(AnomalyDetection, local=TRUE)[c("Depends", + "Imports","LinkingTo","Suggests")] + deps <- unname(unlist(deps)) + installed <- installed.packages()[, "Package"] + success <- all(deps %in% installed) + list(success=success, deps=deps, installed=installed) + """ + ) + + result = fun() + assert result["success"], result + class TestSplatter(unittest.TestCase): @classmethod def setUpClass(self): @@ -238,6 +292,147 @@ def test_splatter_warning(self): rpy2.rinterface_lib.callbacks.consolewrite_warnerror is builtin_warning ) + class TestDyngen(unittest.TestCase): + @classmethod + def setUpClass(self): + scprep.run.dyngen.install(verbose=False) + + def test_install_dyngen_lib(self): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + packages <- installed.packages() + 'dyngen' %in% packages + """ + ) + + assert fun() + + def test_install_dyngen_dependencies_None(self): + scprep.run.dyngen.install(verbose=False) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo")] + all(unname(unlist(deps)) %in% installed.packages()[, "Package"]) + """ + ) + + assert fun() + + def test_install_dyngen_dependencies_True(self): + scprep.run.dyngen.install(verbose=False, dependencies=True) + fun = scprep.run.RFunction( + body=""" + if (!require("pacman", quietly=TRUE)) { + install.packages("pacman", + repos='http://cran.rstudio.com') + } + + deps <- pacman::p_depends(dyngen)[c("Depends","Imports","LinkingTo", + "Suggests")] + deps <- unname(unlist(deps)) + installed <- installed.packages()[, "Package"] + success <- all(deps %in% installed) + list(success=success, deps=deps, installed=installed) + """ + ) + + result = fun() + assert result["success"], result + + def test_dyngen_backbone_not_in_list(self): + utils.assert_raises_message( + ValueError, + "Input not in default backbone list. " + "Choose backbone from get_backbones()", + scprep.run.DyngenSimulate, + backbone="not_a_backbone", + verbose=False, + ) + + def test_dyngen_default(self): + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + + def test_dyngen_force_cell_counts(self): + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + verbose=False, + force_num_cells=True, + ) + + assert set(sim.keys()) == {"cell_info", "expression"} + assert sim["cell_info"].shape[0] == 50 + assert sim["expression"].shape == (50, 70) + + def test_dyngen_with_grn(self): + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_cellwise_grn=True, + verbose=False, + ) + + assert set(sim.keys()) == { + "cell_info", + "expression", + "bulk_grn", + "cellwise_grn", + } + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["bulk_grn"].shape[0] > 0 + assert sim["cellwise_grn"].shape[0] > 0 + + def test_dyngen_with_rna_velocity(self): + sim = scprep.run.DyngenSimulate( + backbone="bifurcating", + num_cells=50, + num_tfs=50, + num_targets=10, + num_hks=10, + compute_rna_velocity=True, + verbose=False, + ) + + assert set(sim.keys()) == {"cell_info", "expression", "rna_velocity"} + assert sim["cell_info"].shape[0] > 0 + assert sim["cell_info"].shape[0] <= 50 + assert sim["expression"].shape[0] > 0 + assert sim["expression"].shape[0] <= 50 + assert sim["expression"].shape[1] == 70 + assert sim["rna_velocity"].shape[0] > 0 + assert sim["rna_velocity"].shape[0] <= 50 + assert sim["rna_velocity"].shape[1] == 70 + class TestSlingshot(unittest.TestCase): @classmethod def setUpClass(self): diff --git a/test/test_sanitize.py b/test/test_sanitize.py index 491c154d..6626f5d3 100644 --- a/test/test_sanitize.py +++ b/test/test_sanitize.py @@ -1,7 +1,10 @@ -from tools import utils, matrix, data -import scprep +from tools import data +from tools import matrix +from tools import utils + import numpy as np import pandas as pd +import scprep import warnings @@ -66,7 +69,8 @@ def test_check_index(): scprep.sanitize.check_index(X) with utils.assert_warns_message( RuntimeWarning, - "Renamed 2 copies of index GATGAGGCATTTCAGG-1 to (GATGAGGCATTTCAGG-1, GATGAGGCATTTCAGG-1.1)", + "Renamed 2 copies of index GATGAGGCATTTCAGG-1 to " + "(GATGAGGCATTTCAGG-1, GATGAGGCATTTCAGG-1.1)", ): scprep.sanitize.check_index(X.iloc[[0, 0]]) with warnings.catch_warnings(): @@ -84,7 +88,8 @@ def test_check_index(): assert Y.loc["GATGAGGCATTTCAGG-1"].shape[0] == 2 with utils.assert_warns_message( RuntimeWarning, - "Renamed 3 copies of index GTCATTTCATCTCGCT-1 to (GTCATTTCATCTCGCT-1, GTCATTTCATCTCGCT-1.1, GTCATTTCATCTCGCT-1.2)", + "Renamed 3 copies of index GTCATTTCATCTCGCT-1 to " + "(GTCATTTCATCTCGCT-1, GTCATTTCATCTCGCT-1.1, GTCATTTCATCTCGCT-1.2)", ): scprep.sanitize.check_index(X.iloc[[1, 1, 1]]) with warnings.catch_warnings(): diff --git a/test/test_select.py b/test/test_select.py index e0d44b99..9690eeb7 100644 --- a/test/test_select.py +++ b/test/test_select.py @@ -1,10 +1,12 @@ -from tools import data, matrix, utils -import scprep +from scipy import sparse +from tools import data +from tools import matrix +from tools import utils import numpy as np import pandas as pd +import scprep import unittest -from scipy import sparse class Test10X(unittest.TestCase): diff --git a/test/test_stats.py b/test/test_stats.py index 28ed21ac..58455caf 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -1,13 +1,14 @@ -from tools import utils, matrix, data -import numpy as np +from functools import partial +from parameterized import parameterized from scipy import stats +from tools import data +from tools import matrix +from tools import utils -from sklearn.metrics import mutual_info_score +import numpy as np +import os import scprep -from functools import partial import warnings -import os -from parameterized import parameterized def _test_fun_2d(X, fun, **kwargs): @@ -161,13 +162,19 @@ def test_knnDREMI(): Y = scprep.stats.knnDREMI(X[:, 0], X[:, 1]) assert isinstance(Y, float) np.testing.assert_allclose(Y, 0.16238906) + n_bins = 20 Y2, drevi = scprep.stats.knnDREMI( - X[:, 0], X[:, 1], plot=True, filename="test.png", return_drevi=True + X[:, 0], + X[:, 1], + plot=True, + filename="test.png", + return_drevi=True, + n_bins=n_bins, ) assert os.path.isfile("test.png") os.remove("test.png") assert Y2 == Y - assert drevi.shape == (20, 20) + assert drevi.shape == (n_bins, n_bins) matrix.test_all_matrix_types( X, utils.assert_transform_equals, @@ -177,9 +184,13 @@ def test_knnDREMI(): ) with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - assert scprep.stats.knnDREMI( - X[:, 0], np.repeat(X[0, 1], X.shape[0]), return_drevi=True - ) == (0, None) + n_bins = 10 + dremi, drevi = scprep.stats.knnDREMI( + X[:, 0], np.repeat(X[0, 1], X.shape[0]), n_bins=n_bins, return_drevi=True + ) + assert dremi == 0 + assert np.all(drevi == 0) + assert drevi.shape == (n_bins, n_bins) utils.assert_raises_message( ValueError, "Expected k as an integer. Got ", diff --git a/test/test_transform.py b/test/test_transform.py index a561fe0f..78b944a4 100644 --- a/test/test_transform.py +++ b/test/test_transform.py @@ -1,8 +1,9 @@ -from tools import utils, matrix, data +from tools import data +from tools import matrix +from tools import utils + import numpy as np import scprep -from scipy import sparse -import pandas as pd import warnings diff --git a/test/test_utils.py b/test/test_utils.py index 40013a38..6cc47257 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -1,9 +1,12 @@ -from tools import data, matrix, utils -import scprep +from parameterized import parameterized from scipy import sparse +from tools import data +from tools import matrix +from tools import utils + import numpy as np import pandas as pd -from parameterized import parameterized +import scprep def test_with_pkg(): @@ -154,10 +157,13 @@ def test_combine_batches(): ) assert np.all(sample_labels.index == Y2.index) assert sample_labels.name == "sample_labels" - transform = lambda X: scprep.utils.combine_batches( - [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], - batch_labels=[0, 1], - )[0] + + def transform(X): + return scprep.utils.combine_batches( + [X, scprep.select.select_rows(X, idx=np.arange(X.shape[0] // 2))], + batch_labels=[0, 1], + )[0] + matrix.test_matrix_types( X, utils.assert_transform_equals, diff --git a/test/tools/data.py b/test/tools/data.py index 7df2b55d..f4156ae9 100644 --- a/test/tools/data.py +++ b/test/tools/data.py @@ -1,6 +1,6 @@ -import scprep -import os import numpy as np +import os +import scprep def _os_agnostic_fullpath_join(path): diff --git a/test/tools/matrix.py b/test/tools/matrix.py index 8bd0f2bd..bfc59e38 100644 --- a/test/tools/matrix.py +++ b/test/tools/matrix.py @@ -1,10 +1,10 @@ +from packaging import version +from scipy import sparse +from scprep.utils import is_SparseDataFrame + import numpy as np import pandas as pd import warnings -from scipy import sparse -from functools import partial -from scprep.utils import is_SparseDataFrame -from packaging import version def _ignore_pandas_sparse_warning(): @@ -109,13 +109,15 @@ def _typename(X): def test_matrix_types(X, test_fun, matrix_types, *args, **kwargs): - """Test a function across a range of matrix types + """Test a function across a range of matrix types. Parameters ---------- X : matrix input test_fun : Function(X, *args, **kwargs) for testing - matrix_types : List of functions (typically class constructors) converting X to desired matrix formats + matrix_types : list + List of functions (typically class constructors) converting X + to desired matrix formats *args : positional arguments for test_fun **kwargs : keyword arguments for test_fun """ diff --git a/test/tools/utils.py b/test/tools/utils.py index afd32548..17d5ea8b 100644 --- a/test/tools/utils.py +++ b/test/tools/utils.py @@ -1,32 +1,43 @@ -import numpy as np +from . import matrix +from nose.tools import assert_raises +from nose.tools import assert_raises_regex +from nose.tools import assert_warns_regex from scipy import sparse +from scprep.utils import is_SparseDataFrame +from scprep.utils import toarray + +import numpy as np import pandas as pd -from nose.tools import assert_raises -from scprep.utils import toarray, is_SparseDataFrame -from . import matrix -from nose.tools import assert_raises_regex, assert_warns_regex import re def assert_warns_message(expected_warning, expected_message, *args, **kwargs): + """Assert that the correct warning message is raised. + + Handles regex better than the default. + """ expected_regex = re.escape(expected_message) return assert_warns_regex(expected_warning, expected_regex, *args, **kwargs) -def assert_raises_message(expected_warning, expected_message, *args, **kwargs): +def assert_raises_message(expected_error, expected_message, *args, **kwargs): + """Assert that the correct error message is raised. + + Handles regex better than the default. + """ expected_regex = re.escape(expected_message) - return assert_raises_regex(expected_warning, expected_regex, *args, **kwargs) + return assert_raises_regex(expected_error, expected_regex, *args, **kwargs) def assert_all_equal(X, Y): - """Assert all values of two matrices are the same""" + """Assert all values of two matrices are the same.""" X = toarray(X) Y = toarray(Y) np.testing.assert_array_equal(X, Y) def assert_all_close(X, Y, rtol=1e-05, atol=1e-08): - """Assert all values of two matrices are similar + """Assert all values of two matrices are similar. Parameters ---------- @@ -39,7 +50,7 @@ def assert_all_close(X, Y, rtol=1e-05, atol=1e-08): def assert_transform_equals(X, Y, transform, check=assert_all_equal, **kwargs): - """Check that transform(X, **kwargs) == Y + """Check that transform(X, **kwargs) == Y. Parameters ---------- @@ -59,7 +70,7 @@ def assert_transform_equals(X, Y, transform, check=assert_all_equal, **kwargs): def assert_transform_unchanged(X, transform, check=assert_all_equal, **kwargs): - """Check that transform(X, **kwargs) == X + """Check that transform(X, **kwargs) == X. Parameters ---------- @@ -76,7 +87,10 @@ def assert_transform_unchanged(X, transform, check=assert_all_equal, **kwargs): def assert_transform_equivalent(X, Y, transform, check=assert_all_equal, **kwargs): - """Check the output of transform(X, **kwargs) == Y and transform(X, **kwargs) gives the same kind of matrix as X + """Check the transformation gives the right result and doesn't change the type. + + Ensures that transform(X, **kwargs) == Y and transform(X, **kwargs) + give the same kind of matrix as X. Parameters ---------- @@ -94,11 +108,11 @@ def assert_transform_equivalent(X, Y, transform, check=assert_all_equal, **kwarg Y2 = assert_transform_equals(X, Y, transform, check=check, **kwargs) assert assert_matrix_class_equivalent( X, Y2 - ), "{} produced inconsistent matrix output".format(_typename(X)) + ), "{} produced inconsistent matrix output".format(matrix._typename(X)) def assert_transform_raises(X, transform, exception=ValueError, **kwargs): - """Check that transform(X) raises exception + """Check that transform(X) raises exception. Parameters ---------- @@ -123,7 +137,7 @@ def _sparse_dataframe_density(X): def assert_matrix_class_equivalent(X, Y): - """Check the format of X and Y are the same + """Check the format of X and Y are the same. We expect: * shape hasn't changed diff --git a/travis_setup.R b/travis_setup.R deleted file mode 100644 index a71289e6..00000000 --- a/travis_setup.R +++ /dev/null @@ -1,5 +0,0 @@ -chooseCRANmirror(ind=1) -if (!require("remotes")) install.packages("remotes", quietly=TRUE) -remotes::update_packages(upgrade="always") -if (!require("BiocManager")) install.packages("BiocManager", quietly=TRUE) -BiocManager::install(update=TRUE, ask=FALSE)