diff --git a/modules/nf-core/varlociraptor/callvariants/main.nf b/modules/nf-core/varlociraptor/callvariants/main.nf new file mode 100644 index 00000000000..0b852c4d780 --- /dev/null +++ b/modules/nf-core/varlociraptor/callvariants/main.nf @@ -0,0 +1,48 @@ +process VARLOCIRAPTOR_CALLVARIANTS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::varlociraptor=8.1.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/varlociraptor:8.1.1--hc349b7f_0': + 'biocontainers/varlociraptor:8.1.1--hc349b7f_0' }" + + input: + tuple val(meta), path(normal_vcf), path(tumor_vcf) + path (scenario) + val (scenario_sample_name) + + output: + tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true + tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true + tuple val(meta), path("*.bcf") , emit: bcf , optional: true + tuple val(meta), path("*.vcf") , emit: vcf , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz" + + //If we use a scenario file and if there is more than 1 normal vcf, then collect scenario_sample_name and normal vcf to scenario_sample_name_0=normal_vcf_0 scenario_sample_name_1=normal_vcf_1, etc + //If we use a scenario file and if there is exactly 1 normal vcf, then scenario_sample_name=normal_vcf + //Else do nothing + def scenario_samples = normal_vcf instanceof List && normal_vcf.size() > 1 ? [scenario_sample_name,normal_vcf].transpose().collect{"${it[0]}=${it[1]}"}.join(' ') : "${scenario_sample_name}=${normal_vcf}" + + //If no scenario is provided, fall back to tumor-normal paired calling + def scenario_command = scenario ? "generic --scenario $scenario --obs ${scenario_samples}" : "tumor-normal --tumor ${tumor_vcf} --normal ${normal_vcf}" + + """ + varlociraptor call variants \\ + --output ${prefix} \\ + ${scenario_command} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + varlociraptor: \$(echo \$(varlociraptor --version 2>&1) | sed 's/^.*varlociraptor //; s/:.*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/varlociraptor/callvariants/meta.yml b/modules/nf-core/varlociraptor/callvariants/meta.yml new file mode 100644 index 00000000000..70ea6d936d9 --- /dev/null +++ b/modules/nf-core/varlociraptor/callvariants/meta.yml @@ -0,0 +1,66 @@ +name: "varlociraptor_callvariants" +description: Call variants for a given scenario specified with the varlociraptor calling grammar, preprocessed by varlociraptor preprocessing +keywords: + - observations + - variants + - calling +tools: + - "varlociraptor": + description: "Flexible, uncertainty-aware variant calling with parameter free filtration via FDR control." + homepage: "https://varlociraptor.github.io/docs/estimating/" + documentation: "https://varlociraptor.github.io/docs/calling/" + tool_dev_url: "https://github.com/varlociraptor/varlociraptor" + doi: "10.1186/s13059-020-01993-6" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - normal_vcf: + type: file + description: Sorted VCF/BCF file (list) + pattern: "*.{bcf,vcf,vcf.gz}" + - tumor_vcf: + type: file + description: Sorted VCF/BCF file (optional) + pattern: "*.{bcf,bcf.gz,vcf,vcf.gz}" + - scenario: + type: file + description: Yaml file containing scenario information (optional) + pattern: "*.{yml,yaml}" + - scenario_sample: + type: string + description: (List of) sample name(s) to be processed as named in the scenario yml (optional) + pattern: "*.{bcf,vcf,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf_gz: + type: file + description: Compressed VCF file containing sample observations + pattern: "*.vcf.gz" + - bcf_gz: + type: file + description: BCF file containing sample observations + pattern: "*.bcf.gz" + - vcf: + type: file + description: VCF file containing sample observations + pattern: "*.vcf" + - bcf: + type: file + description: BCF file containing sample observations + pattern: "*.bcf" + +authors: + - "@FriederikeHanssen" diff --git a/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf b/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf index 581ea13a805..74269fcc653 100644 --- a/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf +++ b/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf @@ -8,8 +8,8 @@ process VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES { input: tuple val(meta), path(bam) - tuple val(meta), path(fasta) - tuple val(meta), path(fai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.alignment-properties.json"), emit: alignment_properties_json diff --git a/modules/nf-core/varlociraptor/preprocess/main.nf b/modules/nf-core/varlociraptor/preprocess/main.nf index 93de96819f8..3f989639349 100644 --- a/modules/nf-core/varlociraptor/preprocess/main.nf +++ b/modules/nf-core/varlociraptor/preprocess/main.nf @@ -15,6 +15,8 @@ process VARLOCIRAPTOR_PREPROCESS { output: tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true + tuple val(meta), path("*.bcf") , emit: bcf , optional: true + tuple val(meta), path("*.vcf") , emit: vcf , optional: true path "versions.yml" , emit: versions when: @@ -22,7 +24,7 @@ process VARLOCIRAPTOR_PREPROCESS { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz" def alignment_properties_json = alignment_json ? "--alignment-properties ${alignment_json}" : "" """ varlociraptor preprocess variants \\ @@ -31,7 +33,7 @@ process VARLOCIRAPTOR_PREPROCESS { --bam $bam \\ --candidates $candidates \\ ${args} \\ - > ${prefix}.vcf.gz + > ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/varlociraptor/preprocess/meta.yml b/modules/nf-core/varlociraptor/preprocess/meta.yml index 55abac6cc58..f86a82d6858 100644 --- a/modules/nf-core/varlociraptor/preprocess/meta.yml +++ b/modules/nf-core/varlociraptor/preprocess/meta.yml @@ -64,6 +64,14 @@ output: type: file description: Compressed VCF file containing sample observations pattern: "*.vcf.gz" + - bcf_gz: + type: file + description: BCF file containing sample observations + pattern: "*.bcf.gz" + - vcf: + type: file + description: VCF file containing sample observations + pattern: "*.vcf" - bcf: type: file description: BCF file containing sample observations diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 2c32a4b4463..4c7c32f8b58 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -3929,6 +3929,10 @@ variantbam: - modules/nf-core/variantbam/** - tests/modules/nf-core/variantbam/** +varlociraptor/callvariants: + - modules/nf-core/varlociraptor/callvariants/** + - tests/modules/nf-core/varlociraptor/callvariants/** + varlociraptor/estimatealignmentproperties: - modules/nf-core/varlociraptor/estimatealignmentproperties/** - tests/modules/nf-core/varlociraptor/estimatealignmentproperties/** diff --git a/tests/config/test_data.config b/tests/config/test_data.config index 667ca59a3c8..b45ee031192 100644 --- a/tests/config/test_data.config +++ b/tests/config/test_data.config @@ -469,6 +469,8 @@ params { test_flowcell = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz" test_flowcell_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv" + varlociraptor_scenario = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml" + contig_ploidy_priors_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv" purecn_ex1_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam" @@ -633,7 +635,7 @@ params { yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta" } 'maxquant' { - mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv" + mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv" mq_proteingroups = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt" mq_samplesheet = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv" diff --git a/tests/modules/nf-core/varlociraptor/callvariants/main.nf b/tests/modules/nf-core/varlociraptor/callvariants/main.nf new file mode 100644 index 00000000000..33280ad910c --- /dev/null +++ b/tests/modules/nf-core/varlociraptor/callvariants/main.nf @@ -0,0 +1,136 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf' +include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf' +include { VARLOCIRAPTOR_PREPROCESS as VARLOCIRAPTOR_PREPROCESS_NORMAL } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf' +include { VARLOCIRAPTOR_PREPROCESS as VARLOCIRAPTOR_PREPROCESS_TUMOR } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf' +include { VARLOCIRAPTOR_CALLVARIANTS } from '../../../../../modules/nf-core/varlociraptor/callvariants/main.nf' + +workflow test_varlociraptor_callvariants_scenario_singlesample { + + bam_normal = [ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + ] + + fasta = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + fai= [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai) + + input_normal = Channel.of([ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + ]).collect() + + VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai) + + scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true)) + + VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.map{meta1, vcf -> [meta1, vcf, []]}, scenario, "normal" ) +} + +workflow test_varlociraptor_callvariants_scenario_multisample { + + bam_normal = [ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + ] + + bam_tumor = [ + [ id:'test_tumor', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + ] + + fasta = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + + fai= [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true) + ] + + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai) + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai) + + input_normal = Channel.of([ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true), + ]).collect() + + input_tumor = Channel.of([ + [ id:'test_tumor', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true), + ]).collect() + + VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai) + VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai) + + scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true)) + + VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.concat(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, [vcf1, vcf2], []]}, scenario, ["normal","normal"] ) +} + +workflow test_varlociraptor_callvariants_tumor_normal { + + bam_normal = [ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + ] + + bam_tumor = [ + [ id:'test_tumor', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + ] + + fasta = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true) + ] + + fai= [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true) + ] + + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai) + VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai) + + input_normal = Channel.of([ + [ id:'test_normal', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true), + ]).collect() + + input_tumor = Channel.of([ + [ id:'test_tumor', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true), + ]).collect() + + VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai) + VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai) + + + VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.combine(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, vcf1, vcf2]},[], [] ) +} + diff --git a/tests/modules/nf-core/varlociraptor/callvariants/nextflow.config b/tests/modules/nf-core/varlociraptor/callvariants/nextflow.config new file mode 100644 index 00000000000..4aad31754bd --- /dev/null +++ b/tests/modules/nf-core/varlociraptor/callvariants/nextflow.config @@ -0,0 +1,8 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName:VARLOCIRAPTOR_CALLVARIANTS { + ext.prefix = { "${meta.id}_called.vcf.gz" } + } +} diff --git a/tests/modules/nf-core/varlociraptor/callvariants/test.yml b/tests/modules/nf-core/varlociraptor/callvariants/test.yml new file mode 100644 index 00000000000..1ec83002ee1 --- /dev/null +++ b/tests/modules/nf-core/varlociraptor/callvariants/test.yml @@ -0,0 +1,52 @@ +- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_singlesample + command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_singlesample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config + tags: + - varlociraptor/callvariants + - varlociraptor + files: + - path: output/varlociraptor/test_normal.alignment-properties.json + contains: + - '"mean": 124.89494470774092,' + - path: output/varlociraptor/test_normal.vcf.gz + md5sum: a091dfd1192e774490e68d72f9db3976 + - path: output/varlociraptor/test_normal_called.vcf.gz + - path: output/varlociraptor/versions.yml + +- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_multisample + command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_multisample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config + tags: + - varlociraptor/callvariants + - varlociraptor + files: + - path: output/varlociraptor/test_normal.alignment-properties.json + contains: + - '"mean": 124.89494470774092,' + - path: output/varlociraptor/test_normal.vcf.gz + md5sum: a091dfd1192e774490e68d72f9db3976 + - path: output/varlociraptor/test_normal_called.vcf.gz + - path: output/varlociraptor/test_tumor.alignment-properties.json + contains: + - '"mean": 126.49488752556238,' + - path: output/varlociraptor/test_tumor.vcf.gz + md5sum: e84afe39f48e9bd197ae9952e15adb92 + - path: output/varlociraptor/versions.yml + +- name: varlociraptor callvariants test_varlociraptor_callvariants_tumor_normal + command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_tumor_normal -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config + tags: + - varlociraptor/callvariants + - varlociraptor + files: + - path: output/varlociraptor/test_normal.alignment-properties.json + contains: + - '"mean": 351.71182796866435,' + - path: output/varlociraptor/test_normal.vcf.gz + md5sum: d5a543484daebc9b32b320ed45921e7e + - path: output/varlociraptor/test_normal_called.vcf.gz + md5sum: c1a586c5b28b32f06a3f2f19c2eefcd3 + - path: output/varlociraptor/test_tumor.alignment-properties.json + contains: + - '"mean": 351.2137723747417,' + - path: output/varlociraptor/test_tumor.vcf.gz + md5sum: 338202061231d567ad323f7aac36a87b + - path: output/varlociraptor/versions.yml diff --git a/tests/modules/nf-core/varlociraptor/estimatealignmentproperties/test.yml b/tests/modules/nf-core/varlociraptor/estimatealignmentproperties/test.yml index 48547e257ae..840c64d7dc0 100644 --- a/tests/modules/nf-core/varlociraptor/estimatealignmentproperties/test.yml +++ b/tests/modules/nf-core/varlociraptor/estimatealignmentproperties/test.yml @@ -5,5 +5,6 @@ - varlociraptor/estimatealignmentproperties files: - path: output/varlociraptor/test.alignment-properties.json - md5sum: 412e5a8f56035446ec6ebd2edb51e7a0 + contains: + - '"mean": 205.109756097561,' - path: output/varlociraptor/versions.yml