Skip to content

Commit

Permalink
Merge pull request #13 from Clinical-Genomics/export
Browse files Browse the repository at this point in the history
Automate the export of datasets
  • Loading branch information
adrosenbaum committed May 20, 2019
2 parents fdf8a3d + ec0e181 commit d1e6896
Show file tree
Hide file tree
Showing 12 changed files with 630 additions and 119 deletions.
93 changes: 93 additions & 0 deletions mutacc_auto/cli/export_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import click
import logging
from pathlib import Path

import yaml

from mutacc_auto.recipes.export_recipe import export_dataset
from mutacc_auto.utils.tmp_dir import TemporaryDirectory

MUTACC_TMP = 'temporaries'
MUTACC_ROOT_DIR = 'root_dir'

LOG = logging.getLogger(__name__)

def parse_path(ctx, param, value):

if value:
value = str(Path(str(value)).expanduser().absolute().resolve())
return value

@click.command('export')
@click.option('-c', '--mutacc-config',
type=click.Path(exists=True),
callback=parse_path,
help="configuration file used for mutacc")
@click.option('-o', '--vcf-out',
type=click.Path(exists=False),
callback=parse_path,
help="Path to created vcf-file")
@click.option('-b', '--background',
type=click.Path(exists=True),
help="yaml file with genomic backgrounds for each sample in trio")
@click.option('-k', '--conda',
is_flag=True,
help="Use 'conda activate' to source environment")
@click.option('-e','--environment',
type=str,
help="conda environment used for mutacc")
@click.option('-D','--dry',
is_flag=True,
help="dry run")
@click.option('-V','--verbose',
is_flag=True,
help="verbose")
@click.pass_context
def export_command(ctx, mutacc_config, vcf_out, background, conda, environment, dry, verbose):

mutacc_config = mutacc_config or ctx.obj['mutacc_config']
mutacc_binary = ctx.obj.get('mutacc_binary')

with open(background, 'r') as background_handle:
background_datasets = yaml.load(background_handle)

slurm_config = {}
if ctx.obj.get('slurm'):
slurm_config = ctx.obj['slurm']

slurm_options = {}
slurm_options['log_directory'] = slurm_config['log_directory']
slurm_options['email'] = slurm_config['email']
slurm_options['time'] = slurm_config['time']
slurm_options['account'] = slurm_config['account']
slurm_options['priority'] = slurm_config['priority']

with open(Path(mutacc_config)) as yaml_handle:
mutacc_config_dict = yaml.load(yaml_handle)
mutacc_tmp = Path(mutacc_config_dict[MUTACC_ROOT_DIR]).joinpath(MUTACC_TMP)

mutacc_auto_tmp_dir = mutacc_tmp.joinpath('mutacc_auto')
if not mutacc_auto_tmp_dir.is_dir():
mutacc_auto_tmp_dir.mkdir(parents=True)

with TemporaryDirectory(directory=mutacc_auto_tmp_dir) as tmp_dir:

LOG.info("All files are placed in {}".format(tmp_dir))
sbatch_files = export_dataset(mutacc_config=mutacc_config,
background=background_datasets,
mutacc_binary=mutacc_binary,
case_query='{}',
merged_vcf_path=vcf_out,
slurm_options=slurm_options,
tmp_dir=tmp_dir,
environment=environment,
conda=conda,
dry=dry)

if verbose:

for key, value in sbatch_files.items():

with open(value) as sbatch_handle:

LOG.info("SBATCH SCRIPT FOR {}, {}\n{}".format(key, value, sbatch_handle.read()))
2 changes: 2 additions & 0 deletions mutacc_auto/cli/root.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from mutacc_auto import __version__
from .extract_command import extract_command
from .import_command import import_command
from .export_command import export_command

LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -36,3 +37,4 @@ def cli(ctx, loglevel, config_file):

cli.add_command(extract_command)
cli.add_command(import_command)
cli.add_command(export_command)
4 changes: 2 additions & 2 deletions mutacc_auto/commands/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def check_output(self):

command_stdout = command_stdout.decode('utf-8')

LOG.debug("Executed:".format(str(self)))
LOG.debug("Executed: {}".format(str(self)))

return command_stdout

Expand All @@ -96,6 +96,6 @@ def call(self):

raise

LOG.debug("Executed:".format(str(self)))
LOG.debug("Executed: {}".format(str(self)))

return result
38 changes: 38 additions & 0 deletions mutacc_auto/commands/mutacc_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,41 @@ def __init__(self, config_file, extracted_case_file, mutacc_binary=None):
self.add_subcommand('db')
self.add_subcommand('import')
self.add_argument(str(extracted_case_file))

class MutaccExport(MutaccCommand):

def __init__(self, config_file, mutacc_binary=None, case_query=None, variant_query=None,
proband=False, member='affected', sample_name=None):

super(MutaccExport, self).__init__(config_file, mutacc_binary=mutacc_binary)

self.add_subcommand('db')
self.add_subcommand('export')

if variant_query is not None:
self.add_option('variant-query', variant_query)
if case_query is not None:
self.add_option('case-query', case_query)
if sample_name is not None:
self.add_option('sample-name', sample_name)
if proband:
self.add_option('proband')

self.add_option('member', member)
self.add_option('json-out')

class MutaccSynthesize(MutaccCommand):

def __init__(self, config_file, fastq1, bam_file, query_file, fastq2=None,
mutacc_binary=None):

super(MutaccSynthesize, self).__init__(config_file, mutacc_binary=mutacc_binary)

self.add_subcommand('synthesize')

self.add_option('background-bam', bam_file)
self.add_option('background-fastq', fastq1)
if fastq2 is not None:
self.add_option('background-fastq2', fastq2)
self.add_option('query', query_file)
self.add_option('json-out')
37 changes: 37 additions & 0 deletions mutacc_auto/commands/vcf_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from .command import Command as BaseCommand

BGZIP_COMMAND = 'bgzip'
TABIX_COMMAND = 'tabix'
BCFTOOLS_MERGE_COMMAND = 'bcftools'

class BgzipCommand(BaseCommand):

def __init__(self, vcf_file):

super(BgzipCommand, self).__init__(BGZIP_COMMAND)

self.add_argument(vcf_file)

class TabixCommand(BaseCommand):

def __init__(self, vcf_file):

super(TabixCommand, self).__init__(TABIX_COMMAND)

self.add_argument(vcf_file)

class BcftoolsMergeCommand(BaseCommand):

def __init__(self, vcf_files, out_vcf=None):

super(BcftoolsMergeCommand, self).__init__(BCFTOOLS_MERGE_COMMAND)

self.add_subcommand('merge')

if out_vcf:
self.add_option('output', out_vcf)

self.add_option('output-type', 'z')

for vcf_file in vcf_files:
self.add_argument(vcf_file)
Loading

0 comments on commit d1e6896

Please sign in to comment.