Issue #5 Working conda recipe (#38)

* init conda recipe files. * initial steps for setup.py and running autometa as its own installed application in any directory. changed structure to fit setup.py distutils and moving formatting for conda recipe * removed numpy from setup.py and removed script from build in meta.yaml * Added to numpy and removed pip * Updates to code structure to reflect proper setup for packaging/installation. Updated meta.yaml to reflect dependencies. Added autometa-configure to entrypoints as console script for database/environment configuration prior to binning runs. * reduce disk memory requirements for overall package size reduction * Working conda recipe for linux and osx. Removed uneeded ipynb in docs and unused build scripts. Moved databases under autometa package and updated default.config to reflect this. markers pointer to database updated in markers.py and added recursive directory construction within databases.py. * Updated <default> metagenome.config and removed (unused) WORKSPACE constant in config. * Updated parser descriptions * Updated version to pre-alpha changed main to __main__. Updated meta.yaml with jinja templating for version, home, license from setup.py * included description in meta.yaml * updated version to 2.0a0 and description in meta.yaml * Added doc url and dev url * updated gitignore and conda arc to reflect database dir change and added erees channel * updated argparse help information. Added COPYRIGHT tags to config/__init__.py. * Added copyright to autometa.py * Updated Dockerfile fixing issue-#3. Note: docker image will need to be updated when tsne is updated. * Added py3 compatible tsne to Dockerfile * updated --log parameter with user-friendly help description
KwanLab · Apr 12, 2020 · 04f25f4 · 04f25f4
1 parent 5994120
commit 04f25f4
Show file tree

Hide file tree

Showing 44 changed files with 728 additions and 95,533 deletions.
diff --git a/.condarc b/.condarc
@@ -0,0 +1,7 @@
+channels:
+  - defaults
+  - bioconda
+  - conda-forge
+  - erees
+show_channels_urls: True
+default_threads: 6
diff --git a/.condarc.yaml b/.condarc.yaml
@@ -0,0 +1,6 @@
+channels:
+  - defaults
+  - bioconda
+  - conda-forge
+show_channels_urls: True
+default_threads: 6
diff --git a/.gitignore b/.gitignore
@@ -135,7 +135,7 @@ dmypy.json
 *.c
 autometa/*.pyc
 autometa/taxonomy/*.pyc
-databases/markers/*.h3*
+autometa/databases/markers/*.h3*
 
 # databases / testing
 tests/data/*

diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM continuumio/anaconda
+FROM continuumio/miniconda3
 MAINTAINER Jason C. Kwan "jason.kwan@wisc.edu"
 
 # Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal,
@@ -19,24 +19,15 @@ MAINTAINER Jason C. Kwan "jason.kwan@wisc.edu"
 # You should have received a copy of the GNU Affero General Public License
 # along with Autometa. If not, see <http://www.gnu.org/licenses/>.
 
-conda install -c bioconda -c conda-forge --yes \
-    biopython \
-    pandas \
-    tqdm \
-    numpy \
-    scikit-learn \
-    scipy \
-    samtools \
-    bedtools \
-    bowtie2 \
-    hmmer \
-    prodigal \
-    diamond \
-    ndcctools \
-    parallel \
-    requests \
-    hdbscan \
-    umap-learn \
-    && conda clean --all --yes
+RUN conda config --prepend channels erees \
+    && conda config --append channels bioconda \
+    && conda config --append channels conda-forge
 
-RUN git clone https://github.com/KwanLab/Autometa
+RUN conda install autometa tsne
+
+RUN echo "testing autometa and tsne import"
+RUN python -c "import autometa"
+RUN python -c "import tsne"
+
+RUN echo "Running Autometa dependencies test"
+RUN autometa --check-dependencies --debug
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,4 @@
+include LICENSE.txt
+include MANIFEST.in
+include README.md
+include setup.py
diff --git a/VERSION b/VERSION
@@ -0,0 +1 @@
+2.0a0
diff --git a/autometa.py b/autometa.py
@@ -21,133 +21,11 @@
 along with Autometa. If not, see <http://www.gnu.org/licenses/>.
 COPYRIGHT
 
-Main script to run Autometa
+Wrapper to run Autometa from base directory without installing.
 """
 
 
-import logging
-import os
-import sys
-
-import multiprocessing as mp
-
-from autometa.config.user import AutometaUser
-
-logger = logging.getLogger('autometa')
-
-
-__version__ = "2.0.0"
-
-def init_logger(fpath=None, level=logging.INFO):
-    """Initialize logger.
-
-    By default will initialize streaming logger with DEBUG level messages.
-    If `fpath` is provided, will write DEBUG level messages to `fpath` and
-    set streaming messages to INFO.
-
-    Parameters
-    ----------
-    fpath : str
-        </path/to/file.log>
-    level : int
-        Overwrite default logging level behavior with provided `level`.
-        This must be a constant from logging levels.
-        See https://docs.python.org/3/library/logging.html#levels for details.
-        i.e. logging.DEBUG, logging.INFO, etc. translates to 0,10, etc...
-
-    Returns
-    -------
-    logging.Logger
-        logging's Logger object to emit messages via methods:
-        'warn','info','debug','error','exception','critical','fatal'
-
-    Raises
-    -------
-    TypeError
-        `level` must be an int
-    ValueError
-        `level` must be one of 0, 10, 20, 30, 40, 50
-    """
-    levels = {
-        logging.NOTSET,
-        logging.DEBUG,
-        logging.INFO,
-        logging.WARNING,
-        logging.ERROR,
-        logging.CRITICAL}
-    if type(level) is not int:
-        raise TypeError(f'{level} must be an int! {type(level)}')
-    if level and level not in levels:
-        raise ValueError(f'{level} not in levels: {levels}!')
-    formatter = logging.Formatter(
-        fmt='[%(asctime)s %(levelname)s] %(name)s: %(message)s',
-        datefmt='%m/%d/%Y %I:%M:%S %p')
-    # Construct file/stream logging handlers
-    streamhandler = logging.StreamHandler()
-    streamhandler.setFormatter(formatter)
-    if fpath:
-        filehandler = logging.FileHandler(fpath)
-        filehandler.setFormatter(formatter)
-        logger.addHandler(filehandler)
-
-    streamhandler.setLevel(level)
-    logger.addHandler(streamhandler)
-    logger.setLevel(logging.DEBUG)
-    return logger
-
-def main(args):
-    # Setup logger
-    # timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime())
-    # log_fpath = args.log if args.log else f'{timestamp}_autometa.log'
-    if args.debug:
-        logger = init_logger(fpath=args.log, level=logging.DEBUG)
-    else:
-        logger = init_logger(fpath=args.log)
-    # Configure AutometaUser
-    # TODO: master from WorkQueue is AutometaUser
-    user = AutometaUser(dryrun=args.check_dependencies, nproc=args.cpus)
-
-    for config in args.config:
-        # TODO: Add directions to master from WorkQueue
-        mgargs = user.prepare_binning_args(config)
-        user.run_binning(mgargs)
-        # user.refine_binning()
-        # user.process_binning()
-    # user.get_pangenomes()
+from autometa.__main__ import entrypoint
 
 if __name__ == '__main__':
-    import argparse
-    import time
-    cpus = mp.cpu_count()
-    parser = argparse.ArgumentParser(description='Main script to run Autometa pipeline.')
-    parser.add_argument('config',
-        help='</path/to/metagenome.config>',
-        nargs='*')
-    parser.add_argument('--cpus',
-        help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)',
-        type=int,
-        default=cpus)
-    parser.add_argument('--debug',
-        help='Stream debugging information to terminal',
-        action='store_true',
-        default=False)
-    parser.add_argument('--log', help='</path/to/autometa.log>', type=str)
-    parser.add_argument('--check-dependencies',
-        help='Check user executables and databases accessible to Autometa and exit.',
-        action='store_true',
-        default=False)
-    args = parser.parse_args()
-    try:
-        main(args)
-    except KeyboardInterrupt:
-        logger.info('User cancelled run. Exiting...')
-    except Exception as err:
-        issue_request = '''
-
-        Please help us fix your problem!
-
-        You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new
-        '''
-        err.issue_request = issue_request
-        logger.exception(err)
-        logger.info(err.issue_request)
+    entrypoint()
diff --git a/autometa/__main__.py b/autometa/__main__.py
@@ -0,0 +1,156 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+COPYRIGHT
+Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal,
+Shaurya Chanana, Izaak Miller, Jason C. Kwan
+
+This file is part of Autometa.
+
+Autometa is free software: you can redistribute it and/or modify
+it under the terms of the GNU Affero General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+Autometa is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Affero General Public License for more details.
+
+You should have received a copy of the GNU Affero General Public License
+along with Autometa. If not, see <http://www.gnu.org/licenses/>.
+COPYRIGHT
+
+Main script to run Autometa
+"""
+
+
+import logging
+import os
+import sys
+
+import multiprocessing as mp
+
+from .config.user import AutometaUser
+
+
+logger = logging.getLogger('autometa')
+
+
+def init_logger(fpath=None, level=logging.INFO):
+    """Initialize logger.
+
+    By default will initialize streaming logger with DEBUG level messages.
+    If `fpath` is provided, will write DEBUG level messages to `fpath` and
+    set streaming messages to INFO.
+
+    Parameters
+    ----------
+    fpath : str
+        </path/to/file.log>
+    level : int
+        Overwrite default logging level behavior with provided `level`.
+        This must be a constant from logging levels.
+        See https://docs.python.org/3/library/logging.html#levels for details.
+        i.e. logging.DEBUG, logging.INFO, etc. translates to 0,10, etc...
+
+    Returns
+    -------
+    logging.Logger
+        logging's Logger object to emit messages via methods:
+        'warn','info','debug','error','exception','critical','fatal'
+
+    Raises
+    -------
+    TypeError
+        `level` must be an int
+    ValueError
+        `level` must be one of 0, 10, 20, 30, 40, 50
+    """
+    levels = {
+        logging.NOTSET,
+        logging.DEBUG,
+        logging.INFO,
+        logging.WARNING,
+        logging.ERROR,
+        logging.CRITICAL}
+    if type(level) is not int:
+        raise TypeError(f'{level} must be an int! {type(level)}')
+    if level and level not in levels:
+        raise ValueError(f'{level} not in levels: {levels}!')
+    formatter = logging.Formatter(
+        fmt='[%(asctime)s %(levelname)s] %(name)s: %(message)s',
+        datefmt='%m/%d/%Y %I:%M:%S %p')
+    # Construct file/stream logging handlers
+    streamhandler = logging.StreamHandler()
+    streamhandler.setFormatter(formatter)
+    if fpath:
+        filehandler = logging.FileHandler(fpath)
+        filehandler.setFormatter(formatter)
+        logger.addHandler(filehandler)
+
+    streamhandler.setLevel(level)
+    logger.addHandler(streamhandler)
+    logger.setLevel(logging.DEBUG)
+    return logger
+
+def main(args):
+    # Setup logger
+    # timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime())
+    # log_fpath = args.log if args.log else f'{timestamp}_autometa.log'
+    if args.debug:
+        logger = init_logger(fpath=args.log, level=logging.DEBUG)
+    else:
+        logger = init_logger(fpath=args.log)
+    # Configure AutometaUser
+    # TODO: master from WorkQueue is AutometaUser
+    user = AutometaUser(nproc=args.cpus)
+    user.configure(dryrun=args.check_dependencies)
+
+    for config in args.config:
+        # TODO: Add directions to master from WorkQueue
+        mgargs = user.prepare_binning_args(config)
+        user.run_binning(mgargs)
+        # user.refine_binning()
+        # user.process_binning()
+    # user.get_pangenomes()
+
+def entrypoint():
+    import argparse
+    import time
+    cpus = mp.cpu_count()
+    parser = argparse.ArgumentParser(description='Main script to run the Autometa pipeline.')
+    parser.add_argument('config',
+        help='Path to your metagenome.config file',
+        nargs='*')
+    parser.add_argument('--cpus',
+        help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)',
+        type=int,
+        default=cpus)
+    parser.add_argument('--debug',
+        help='Stream debugging information to terminal',
+        action='store_const',
+        const=logging.DEBUG)
+    parser.add_argument('--log', help='Path to write a log file (e.g. </path/to/autometa.log>)', type=str)
+    parser.add_argument('--check-dependencies',
+        help='Check user executables and databases accessible to Autometa and exit.',
+        action='store_true')
+    args = parser.parse_args()
+
+    try:
+        main(args)
+    except KeyboardInterrupt:
+        logger.info('User cancelled run. Exiting...')
+    except Exception as err:
+        issue_request = '''
+
+        Please help us fix your problem!
+
+        You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new
+        '''
+        err.issue_request = issue_request
+        logger.exception(err)
+        logger.info(err.issue_request)
+
+if __name__ == '__main__':
+    entrypoint()
diff --git a/autometa/binning/__init__.py b/autometa/binning/__init__.py
diff --git a/autometa/binning/bhsne.py b/autometa/binning/bhsne.py
@@ -50,13 +50,7 @@ def embed(kmers_fpath):
     X = bh_sne(X, d=2)
     return pd.DataFrame(X, columns=['x','y'], index=df.index)
 
-def main(args):
-    df = embed(args.kmers)
-    logger.debug('{} embedded. : df.shape: {}'.format(args.kmers, df.shape))
-    df.to_csv(args.embedded, sep='\t', index=True, header=True)
-    logger.debug('embedded written {}'.format(args.embedded))
-
-if __name__ == '__main__':
+def main():
     import argparse
     import logging as logger
     logger.basicConfig(
@@ -67,4 +61,10 @@ def main(args):
     parser.add_argument('kmers',help='</path/to/kmers.normalized.tsv>')
     parser.add_argument('embedded',help='</path/to/kmers.embedded.tsv>')
     args = parser.parse_args()
-    main(args)
+    df = embed(args.kmers)
+    logger.debug('{} embedded. : df.shape: {}'.format(args.kmers, df.shape))
+    df.to_csv(args.embedded, sep='\t', index=True, header=True)
+    logger.debug('embedded written {}'.format(args.embedded))
+
+if __name__ == '__main__':
+    main()