From 505737459ecc790d9f5bdbb3a4847b4a031bb63f Mon Sep 17 00:00:00 2001 From: chapmanb Date: Tue, 1 May 2012 21:22:44 -0400 Subject: [PATCH] Support haploid calling, background VCF input and specific target regions as part of freebayes variant calling --- nextgen/README.md | 20 +++++++++++++++++++ nextgen/bcbio/variation/freebayes.py | 15 +++++++++++++- .../data/automated/run_info-variantcall.yaml | 4 ++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/nextgen/README.md b/nextgen/README.md index f346748b..515f9cf4 100644 --- a/nextgen/README.md +++ b/nextgen/README.md @@ -328,6 +328,26 @@ depth: [v1]: http://www.broadinstitute.org/gsa/wiki/index.php/GATK_resource_bundle +## Configuration options + +The YAML configuration file provides a number of hooks to customize analysis. +Place these under the `analysis` keyword. For variant calling: + +- `aligner` Aligner to use: [bwa, bowtie, bowtie2, mosaik, novoalign] +- `trim_reads` Whether to trim off 3' B-only ends from fastq reads [false, true] +- `variantcaller` Variant calling algorithm [gatk, freebayes] +- `quality_format` Quality format of fastq inputs [Illumina, Standard] +- `coverage_interval` Regions covered by sequencing. This influences filtra +- `hybrid_target` BED file with target regions for hybrid selection experiments. +- `variant_regions` BED file of regions to call variants in. +- `ploidy` Ploidy of called reads. Defaults to 2 (diploid). + +Global reference files for variant calling and assessment: + +- `train_hapmap`, `train_1000g_omni`, `train_indels` Training files for GATK + variant recalibration. +- `call_background` Background VCF to use for calling. + ## Internals: files generated by this pipeline ### Initial Fastq files (pre-analysis) diff --git a/nextgen/bcbio/variation/freebayes.py b/nextgen/bcbio/variation/freebayes.py index 09691e6e..7895f243 100644 --- a/nextgen/bcbio/variation/freebayes.py +++ b/nextgen/bcbio/variation/freebayes.py @@ -12,6 +12,17 @@ from bcbio.variation import annotation, genotype from bcbio.log import logger +def _freebayes_options_from_config(aconfig): + opts = [] + opts += ["--ploidy", str(aconfig.get("ploidy", 2))] + regions = aconfig.get("variant_regions", None) + if regions: + opts += ["--targets", regions] + background = aconfig.get("call_background", None) + if background: + opts += ["--variant-input", background] + return opts + def run_freebayes(align_bam, ref_file, config, dbsnp=None, region=None, out_file=None): """Detect small polymorphisms with FreeBayes. @@ -23,7 +34,9 @@ def run_freebayes(align_bam, ref_file, config, dbsnp=None, region=None, region=region, fname=os.path.basename(align_bam))) with file_transaction(out_file) as tx_out_file: cl = [config["program"].get("freebayes", "freebayes"), - "-b", align_bam, "-v", tx_out_file, "-f", ref_file] + "-b", align_bam, "-v", tx_out_file, "-f", ref_file, + "--left-align-indels"] + cl += _freebayes_options_from_config(config["algorithm"]) if region: cl.extend(["-r", region]) subprocess.check_call(cl) diff --git a/nextgen/tests/data/automated/run_info-variantcall.yaml b/nextgen/tests/data/automated/run_info-variantcall.yaml index 017acdd3..ad82d422 100644 --- a/nextgen/tests/data/automated/run_info-variantcall.yaml +++ b/nextgen/tests/data/automated/run_info-variantcall.yaml @@ -1,8 +1,8 @@ details: - analysis: SNP calling algorithm: - #aligner: mosaik - #variantcaller: freebayes + aligner: mosaik + variantcaller: freebayes #sv_detection: hydra quality_format: Standard coverage_interval: regional