Skip to content

Commit

Permalink
Issue #5 Working conda recipe (#38)
Browse files Browse the repository at this point in the history
* init conda recipe files.

* initial steps for setup.py and running autometa as its own installed application in any directory. changed structure to fit setup.py distutils and moving formatting for conda recipe

* removed numpy from setup.py and removed script from build in meta.yaml

* Added to numpy and removed pip

* Updates to code structure to reflect proper setup for packaging/installation. Updated meta.yaml to reflect dependencies. Added autometa-configure to entrypoints as console script for database/environment configuration prior to binning runs.

* reduce disk memory requirements for overall package size reduction

* Working conda recipe for linux and osx. Removed uneeded ipynb in docs and unused build scripts. Moved databases under autometa package and updated default.config to reflect this. markers pointer to database updated in markers.py and added recursive directory construction within databases.py.

* Updated <default> metagenome.config and removed (unused) WORKSPACE constant in config.

* Updated parser descriptions

* Updated version to pre-alpha changed main to __main__. Updated meta.yaml with jinja templating for version, home, license from setup.py

* included description in meta.yaml

* updated version to 2.0a0 and description in meta.yaml

* Added doc url and dev url

* updated gitignore and conda arc to reflect database dir change and added erees channel

* updated argparse help information. Added COPYRIGHT tags to config/__init__.py.

* Added copyright to autometa.py

* Updated Dockerfile fixing issue-#3. Note: docker image will need to be updated when tsne is updated.

* Added py3 compatible tsne to Dockerfile

* updated --log parameter with user-friendly help description
  • Loading branch information
evanroyrees committed Apr 12, 2020
1 parent 5994120 commit 04f25f4
Show file tree
Hide file tree
Showing 44 changed files with 728 additions and 95,533 deletions.
7 changes: 7 additions & 0 deletions .condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
channels:
- defaults
- bioconda
- conda-forge
- erees
show_channels_urls: True
default_threads: 6
6 changes: 6 additions & 0 deletions .condarc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
channels:
- defaults
- bioconda
- conda-forge
show_channels_urls: True
default_threads: 6
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ dmypy.json
*.c
autometa/*.pyc
autometa/taxonomy/*.pyc
databases/markers/*.h3*
autometa/databases/markers/*.h3*

# databases / testing
tests/data/*
Expand Down
33 changes: 12 additions & 21 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM continuumio/anaconda
FROM continuumio/miniconda3
MAINTAINER Jason C. Kwan "jason.kwan@wisc.edu"

# Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal,
Expand All @@ -19,24 +19,15 @@ MAINTAINER Jason C. Kwan "jason.kwan@wisc.edu"
# You should have received a copy of the GNU Affero General Public License
# along with Autometa. If not, see <http://www.gnu.org/licenses/>.

conda install -c bioconda -c conda-forge --yes \
biopython \
pandas \
tqdm \
numpy \
scikit-learn \
scipy \
samtools \
bedtools \
bowtie2 \
hmmer \
prodigal \
diamond \
ndcctools \
parallel \
requests \
hdbscan \
umap-learn \
&& conda clean --all --yes
RUN conda config --prepend channels erees \
&& conda config --append channels bioconda \
&& conda config --append channels conda-forge

RUN git clone https://github.com/KwanLab/Autometa
RUN conda install autometa tsne

RUN echo "testing autometa and tsne import"
RUN python -c "import autometa"
RUN python -c "import tsne"

RUN echo "Running Autometa dependencies test"
RUN autometa --check-dependencies --debug
4 changes: 4 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
include LICENSE.txt
include MANIFEST.in
include README.md
include setup.py
1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2.0a0
128 changes: 3 additions & 125 deletions autometa.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,133 +21,11 @@
along with Autometa. If not, see <http://www.gnu.org/licenses/>.
COPYRIGHT
Main script to run Autometa
Wrapper to run Autometa from base directory without installing.
"""


import logging
import os
import sys

import multiprocessing as mp

from autometa.config.user import AutometaUser

logger = logging.getLogger('autometa')


__version__ = "2.0.0"

def init_logger(fpath=None, level=logging.INFO):
"""Initialize logger.
By default will initialize streaming logger with DEBUG level messages.
If `fpath` is provided, will write DEBUG level messages to `fpath` and
set streaming messages to INFO.
Parameters
----------
fpath : str
</path/to/file.log>
level : int
Overwrite default logging level behavior with provided `level`.
This must be a constant from logging levels.
See https://docs.python.org/3/library/logging.html#levels for details.
i.e. logging.DEBUG, logging.INFO, etc. translates to 0,10, etc...
Returns
-------
logging.Logger
logging's Logger object to emit messages via methods:
'warn','info','debug','error','exception','critical','fatal'
Raises
-------
TypeError
`level` must be an int
ValueError
`level` must be one of 0, 10, 20, 30, 40, 50
"""
levels = {
logging.NOTSET,
logging.DEBUG,
logging.INFO,
logging.WARNING,
logging.ERROR,
logging.CRITICAL}
if type(level) is not int:
raise TypeError(f'{level} must be an int! {type(level)}')
if level and level not in levels:
raise ValueError(f'{level} not in levels: {levels}!')
formatter = logging.Formatter(
fmt='[%(asctime)s %(levelname)s] %(name)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
# Construct file/stream logging handlers
streamhandler = logging.StreamHandler()
streamhandler.setFormatter(formatter)
if fpath:
filehandler = logging.FileHandler(fpath)
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)

streamhandler.setLevel(level)
logger.addHandler(streamhandler)
logger.setLevel(logging.DEBUG)
return logger

def main(args):
# Setup logger
# timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime())
# log_fpath = args.log if args.log else f'{timestamp}_autometa.log'
if args.debug:
logger = init_logger(fpath=args.log, level=logging.DEBUG)
else:
logger = init_logger(fpath=args.log)
# Configure AutometaUser
# TODO: master from WorkQueue is AutometaUser
user = AutometaUser(dryrun=args.check_dependencies, nproc=args.cpus)

for config in args.config:
# TODO: Add directions to master from WorkQueue
mgargs = user.prepare_binning_args(config)
user.run_binning(mgargs)
# user.refine_binning()
# user.process_binning()
# user.get_pangenomes()
from autometa.__main__ import entrypoint

if __name__ == '__main__':
import argparse
import time
cpus = mp.cpu_count()
parser = argparse.ArgumentParser(description='Main script to run Autometa pipeline.')
parser.add_argument('config',
help='</path/to/metagenome.config>',
nargs='*')
parser.add_argument('--cpus',
help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)',
type=int,
default=cpus)
parser.add_argument('--debug',
help='Stream debugging information to terminal',
action='store_true',
default=False)
parser.add_argument('--log', help='</path/to/autometa.log>', type=str)
parser.add_argument('--check-dependencies',
help='Check user executables and databases accessible to Autometa and exit.',
action='store_true',
default=False)
args = parser.parse_args()
try:
main(args)
except KeyboardInterrupt:
logger.info('User cancelled run. Exiting...')
except Exception as err:
issue_request = '''
Please help us fix your problem!
You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new
'''
err.issue_request = issue_request
logger.exception(err)
logger.info(err.issue_request)
entrypoint()
156 changes: 156 additions & 0 deletions autometa/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
COPYRIGHT
Copyright 2020 Ian J. Miller, Evan R. Rees, Kyle Wolf, Siddharth Uppal,
Shaurya Chanana, Izaak Miller, Jason C. Kwan
This file is part of Autometa.
Autometa is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Autometa is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with Autometa. If not, see <http://www.gnu.org/licenses/>.
COPYRIGHT
Main script to run Autometa
"""


import logging
import os
import sys

import multiprocessing as mp

from .config.user import AutometaUser


logger = logging.getLogger('autometa')


def init_logger(fpath=None, level=logging.INFO):
"""Initialize logger.
By default will initialize streaming logger with DEBUG level messages.
If `fpath` is provided, will write DEBUG level messages to `fpath` and
set streaming messages to INFO.
Parameters
----------
fpath : str
</path/to/file.log>
level : int
Overwrite default logging level behavior with provided `level`.
This must be a constant from logging levels.
See https://docs.python.org/3/library/logging.html#levels for details.
i.e. logging.DEBUG, logging.INFO, etc. translates to 0,10, etc...
Returns
-------
logging.Logger
logging's Logger object to emit messages via methods:
'warn','info','debug','error','exception','critical','fatal'
Raises
-------
TypeError
`level` must be an int
ValueError
`level` must be one of 0, 10, 20, 30, 40, 50
"""
levels = {
logging.NOTSET,
logging.DEBUG,
logging.INFO,
logging.WARNING,
logging.ERROR,
logging.CRITICAL}
if type(level) is not int:
raise TypeError(f'{level} must be an int! {type(level)}')
if level and level not in levels:
raise ValueError(f'{level} not in levels: {levels}!')
formatter = logging.Formatter(
fmt='[%(asctime)s %(levelname)s] %(name)s: %(message)s',
datefmt='%m/%d/%Y %I:%M:%S %p')
# Construct file/stream logging handlers
streamhandler = logging.StreamHandler()
streamhandler.setFormatter(formatter)
if fpath:
filehandler = logging.FileHandler(fpath)
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)

streamhandler.setLevel(level)
logger.addHandler(streamhandler)
logger.setLevel(logging.DEBUG)
return logger

def main(args):
# Setup logger
# timestamp = time.strftime("%Y-%m-%d_%H-%M-%S",time.gmtime())
# log_fpath = args.log if args.log else f'{timestamp}_autometa.log'
if args.debug:
logger = init_logger(fpath=args.log, level=logging.DEBUG)
else:
logger = init_logger(fpath=args.log)
# Configure AutometaUser
# TODO: master from WorkQueue is AutometaUser
user = AutometaUser(nproc=args.cpus)
user.configure(dryrun=args.check_dependencies)

for config in args.config:
# TODO: Add directions to master from WorkQueue
mgargs = user.prepare_binning_args(config)
user.run_binning(mgargs)
# user.refine_binning()
# user.process_binning()
# user.get_pangenomes()

def entrypoint():
import argparse
import time
cpus = mp.cpu_count()
parser = argparse.ArgumentParser(description='Main script to run the Autometa pipeline.')
parser.add_argument('config',
help='Path to your metagenome.config file',
nargs='*')
parser.add_argument('--cpus',
help=f'Num. cpus to use when updating/constructing databases (default: {cpus} cpus)',
type=int,
default=cpus)
parser.add_argument('--debug',
help='Stream debugging information to terminal',
action='store_const',
const=logging.DEBUG)
parser.add_argument('--log', help='Path to write a log file (e.g. </path/to/autometa.log>)', type=str)
parser.add_argument('--check-dependencies',
help='Check user executables and databases accessible to Autometa and exit.',
action='store_true')
args = parser.parse_args()

try:
main(args)
except KeyboardInterrupt:
logger.info('User cancelled run. Exiting...')
except Exception as err:
issue_request = '''
Please help us fix your problem!
You may file an issue with us at https://github.com/KwanLab/Autometa/issues/new
'''
err.issue_request = issue_request
logger.exception(err)
logger.info(err.issue_request)

if __name__ == '__main__':
entrypoint()
Empty file added autometa/binning/__init__.py
Empty file.
16 changes: 8 additions & 8 deletions autometa/binning/bhsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,7 @@ def embed(kmers_fpath):
X = bh_sne(X, d=2)
return pd.DataFrame(X, columns=['x','y'], index=df.index)

def main(args):
df = embed(args.kmers)
logger.debug('{} embedded. : df.shape: {}'.format(args.kmers, df.shape))
df.to_csv(args.embedded, sep='\t', index=True, header=True)
logger.debug('embedded written {}'.format(args.embedded))

if __name__ == '__main__':
def main():
import argparse
import logging as logger
logger.basicConfig(
Expand All @@ -67,4 +61,10 @@ def main(args):
parser.add_argument('kmers',help='</path/to/kmers.normalized.tsv>')
parser.add_argument('embedded',help='</path/to/kmers.embedded.tsv>')
args = parser.parse_args()
main(args)
df = embed(args.kmers)
logger.debug('{} embedded. : df.shape: {}'.format(args.kmers, df.shape))
df.to_csv(args.embedded, sep='\t', index=True, header=True)
logger.debug('embedded written {}'.format(args.embedded))

if __name__ == '__main__':
main()
Loading

0 comments on commit 04f25f4

Please sign in to comment.