add varlociraptor call subcommand (nf-core#3529)

* add call subcommand * at calls runs now [skip ci] * add test files * linting * fix tests * add contains check * Update tests/modules/nf-core/varlociraptor/callvariants/main.nf Co-authored-by: Maxime U Garcia <maxime.garcia@scilifelab.se> --------- Co-authored-by: Maxime U Garcia <maxime.garcia@scilifelab.se>
JPejovicApis · Oct 10, 2023 · c32c0e6 · c32c0e6
1 parent 2d04695
commit c32c0e6
Show file tree

Hide file tree

Showing 11 changed files with 333 additions and 6 deletions.
diff --git a/modules/nf-core/varlociraptor/callvariants/main.nf b/modules/nf-core/varlociraptor/callvariants/main.nf
@@ -0,0 +1,48 @@
+process VARLOCIRAPTOR_CALLVARIANTS {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::varlociraptor=8.1.1"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/varlociraptor:8.1.1--hc349b7f_0':
+        'biocontainers/varlociraptor:8.1.1--hc349b7f_0' }"
+
+    input:
+    tuple val(meta), path(normal_vcf), path(tumor_vcf)
+    path (scenario)
+    val (scenario_sample_name)
+
+    output:
+    tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true
+    tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true
+    tuple val(meta), path("*.bcf")   , emit: bcf   , optional: true
+    tuple val(meta), path("*.vcf")   , emit: vcf   , optional: true
+    path "versions.yml"              , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz"
+
+    //If we use a scenario file and if there is more than 1 normal vcf, then collect scenario_sample_name and normal vcf to scenario_sample_name_0=normal_vcf_0 scenario_sample_name_1=normal_vcf_1, etc
+    //If we use a scenario file and if there is exactly 1 normal vcf, then scenario_sample_name=normal_vcf
+    //Else do nothing
+    def scenario_samples = normal_vcf instanceof List &&  normal_vcf.size() > 1 ? [scenario_sample_name,normal_vcf].transpose().collect{"${it[0]}=${it[1]}"}.join(' ') : "${scenario_sample_name}=${normal_vcf}"
+
+    //If no scenario is provided, fall back to tumor-normal paired calling
+    def scenario_command =  scenario ? "generic --scenario $scenario --obs ${scenario_samples}" : "tumor-normal --tumor ${tumor_vcf} --normal ${normal_vcf}"
+
+    """
+    varlociraptor call variants \\
+        --output ${prefix} \\
+        ${scenario_command} \\
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        varlociraptor: \$(echo \$(varlociraptor --version 2>&1) | sed 's/^.*varlociraptor //; s/:.*\$//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/varlociraptor/callvariants/meta.yml b/modules/nf-core/varlociraptor/callvariants/meta.yml
@@ -0,0 +1,66 @@
+name: "varlociraptor_callvariants"
+description: Call variants for a given scenario specified with the varlociraptor calling grammar, preprocessed by varlociraptor preprocessing
+keywords:
+  - observations
+  - variants
+  - calling
+tools:
+  - "varlociraptor":
+      description: "Flexible, uncertainty-aware variant calling with parameter free filtration via FDR control."
+      homepage: "https://varlociraptor.github.io/docs/estimating/"
+      documentation: "https://varlociraptor.github.io/docs/calling/"
+      tool_dev_url: "https://github.com/varlociraptor/varlociraptor"
+      doi: "10.1186/s13059-020-01993-6"
+      licence: "['GPL v3']"
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - normal_vcf:
+      type: file
+      description: Sorted VCF/BCF file (list)
+      pattern: "*.{bcf,vcf,vcf.gz}"
+  - tumor_vcf:
+      type: file
+      description: Sorted VCF/BCF file (optional)
+      pattern: "*.{bcf,bcf.gz,vcf,vcf.gz}"
+  - scenario:
+      type: file
+      description: Yaml file containing scenario information (optional)
+      pattern: "*.{yml,yaml}"
+  - scenario_sample:
+      type: string
+      description: (List of) sample name(s) to be processed as named in the scenario yml (optional)
+      pattern: "*.{bcf,vcf,vcf.gz}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - vcf_gz:
+      type: file
+      description: Compressed VCF file containing sample observations
+      pattern: "*.vcf.gz"
+  - bcf_gz:
+      type: file
+      description: BCF file containing sample observations
+      pattern: "*.bcf.gz"
+  - vcf:
+      type: file
+      description: VCF file containing sample observations
+      pattern: "*.vcf"
+  - bcf:
+      type: file
+      description: BCF file containing sample observations
+      pattern: "*.bcf"
+
+authors:
+  - "@FriederikeHanssen"
diff --git a/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf b/modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf
@@ -8,8 +8,8 @@ process VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES {
 
     input:
     tuple val(meta), path(bam)
-    tuple val(meta), path(fasta)
-    tuple val(meta), path(fai)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
 
     output:
     tuple val(meta), path("*.alignment-properties.json"), emit: alignment_properties_json

diff --git a/modules/nf-core/varlociraptor/preprocess/main.nf b/modules/nf-core/varlociraptor/preprocess/main.nf
@@ -15,14 +15,16 @@ process VARLOCIRAPTOR_PREPROCESS {
     output:
     tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true
     tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true
+    tuple val(meta), path("*.bcf")   , emit: bcf   , optional: true
+    tuple val(meta), path("*.vcf")   , emit: vcf   , optional: true
     path "versions.yml"              , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz"
     def alignment_properties_json = alignment_json ? "--alignment-properties ${alignment_json}" : ""
     """
     varlociraptor preprocess variants \\
@@ -31,7 +33,7 @@ process VARLOCIRAPTOR_PREPROCESS {
         --bam $bam \\
         --candidates $candidates \\
         ${args} \\
-        > ${prefix}.vcf.gz
+        > ${prefix}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/nf-core/varlociraptor/preprocess/meta.yml b/modules/nf-core/varlociraptor/preprocess/meta.yml
@@ -64,6 +64,14 @@ output:
       type: file
       description: Compressed VCF file containing sample observations
       pattern: "*.vcf.gz"
+  - bcf_gz:
+      type: file
+      description: BCF file containing sample observations
+      pattern: "*.bcf.gz"
+  - vcf:
+      type: file
+      description: VCF file containing sample observations
+      pattern: "*.vcf"
   - bcf:
       type: file
       description: BCF file containing sample observations

diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml
@@ -3929,6 +3929,10 @@ variantbam:
   - modules/nf-core/variantbam/**
   - tests/modules/nf-core/variantbam/**
 
+varlociraptor/callvariants:
+  - modules/nf-core/varlociraptor/callvariants/**
+  - tests/modules/nf-core/varlociraptor/callvariants/**
+
 varlociraptor/estimatealignmentproperties:
   - modules/nf-core/varlociraptor/estimatealignmentproperties/**
   - tests/modules/nf-core/varlociraptor/estimatealignmentproperties/**

diff --git a/tests/config/test_data.config b/tests/config/test_data.config
@@ -469,6 +469,8 @@ params {
                 test_flowcell                                           = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz"
                 test_flowcell_samplesheet                               = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv"
 
+                varlociraptor_scenario                                  = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml"
+
                 contig_ploidy_priors_table                              = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv"
 
                 purecn_ex1_bam                                          = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam"
@@ -633,7 +635,7 @@ params {
                 yeast_ups                       = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta"
             }
             'maxquant' {
-                mq_contrasts                    = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv"    
+                mq_contrasts                    = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv"
                 mq_proteingroups                = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt"
                 mq_samplesheet                  = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv"
 

diff --git a/tests/modules/nf-core/varlociraptor/callvariants/main.nf b/tests/modules/nf-core/varlociraptor/callvariants/main.nf
@@ -0,0 +1,136 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf'
+include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR  } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf'
+include { VARLOCIRAPTOR_PREPROCESS                  as VARLOCIRAPTOR_PREPROCESS_NORMAL                  } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf'
+include { VARLOCIRAPTOR_PREPROCESS                  as VARLOCIRAPTOR_PREPROCESS_TUMOR                   } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf'
+include { VARLOCIRAPTOR_CALLVARIANTS                                                                    } from '../../../../../modules/nf-core/varlociraptor/callvariants/main.nf'
+
+workflow test_varlociraptor_callvariants_scenario_singlesample {
+
+    bam_normal = [
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+    ]
+
+    fasta = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    ]
+
+    fai= [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ]
+
+    VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)
+
+    input_normal = Channel.of([
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+    ]).collect()
+
+    VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)
+
+    scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true))
+
+    VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.map{meta1, vcf -> [meta1, vcf, []]}, scenario, "normal" )
+}
+
+workflow test_varlociraptor_callvariants_scenario_multisample {
+
+    bam_normal = [
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+    ]
+
+    bam_tumor = [
+        [ id:'test_tumor', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
+    ]
+
+    fasta = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+    ]
+
+    fai= [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
+    ]
+
+    VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)
+    VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai)
+
+    input_normal = Channel.of([
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
+    ]).collect()
+
+    input_tumor = Channel.of([
+        [ id:'test_tumor', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true),
+    ]).collect()
+
+    VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)
+    VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai)
+
+    scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true))
+
+    VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.concat(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, [vcf1, vcf2], []]}, scenario, ["normal","normal"] )
+}
+
+workflow test_varlociraptor_callvariants_tumor_normal {
+
+    bam_normal = [
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
+    ]
+
+    bam_tumor = [
+        [ id:'test_tumor', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
+    ]
+
+    fasta = [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
+    ]
+
+    fai= [
+        [ id:'test', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
+    ]
+
+    VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)
+    VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai)
+
+    input_normal = Channel.of([
+        [ id:'test_normal', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
+    ]).collect()
+
+    input_tumor = Channel.of([
+        [ id:'test_tumor', single_end:false ], // meta map
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
+        file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
+    ]).collect()
+
+    VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)
+    VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai)
+
+
+    VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.combine(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, vcf1, vcf2]},[], [] )
+}
+
diff --git a/tests/modules/nf-core/varlociraptor/callvariants/nextflow.config b/tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
@@ -0,0 +1,8 @@
+process {
+
+    publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
+
+    withName:VARLOCIRAPTOR_CALLVARIANTS {
+        ext.prefix = { "${meta.id}_called.vcf.gz" }
+    }
+}
diff --git a/tests/modules/nf-core/varlociraptor/callvariants/test.yml b/tests/modules/nf-core/varlociraptor/callvariants/test.yml
@@ -0,0 +1,52 @@
+- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_singlesample
+  command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_singlesample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
+  tags:
+    - varlociraptor/callvariants
+    - varlociraptor
+  files:
+    - path: output/varlociraptor/test_normal.alignment-properties.json
+      contains:
+        - '"mean": 124.89494470774092,'
+    - path: output/varlociraptor/test_normal.vcf.gz
+      md5sum: a091dfd1192e774490e68d72f9db3976
+    - path: output/varlociraptor/test_normal_called.vcf.gz
+    - path: output/varlociraptor/versions.yml
+
+- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_multisample
+  command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_multisample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
+  tags:
+    - varlociraptor/callvariants
+    - varlociraptor
+  files:
+    - path: output/varlociraptor/test_normal.alignment-properties.json
+      contains:
+        - '"mean": 124.89494470774092,'
+    - path: output/varlociraptor/test_normal.vcf.gz
+      md5sum: a091dfd1192e774490e68d72f9db3976
+    - path: output/varlociraptor/test_normal_called.vcf.gz
+    - path: output/varlociraptor/test_tumor.alignment-properties.json
+      contains:
+        - '"mean": 126.49488752556238,'
+    - path: output/varlociraptor/test_tumor.vcf.gz
+      md5sum: e84afe39f48e9bd197ae9952e15adb92
+    - path: output/varlociraptor/versions.yml
+
+- name: varlociraptor callvariants test_varlociraptor_callvariants_tumor_normal
+  command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_tumor_normal -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
+  tags:
+    - varlociraptor/callvariants
+    - varlociraptor
+  files:
+    - path: output/varlociraptor/test_normal.alignment-properties.json
+      contains:
+        - '"mean": 351.71182796866435,'
+    - path: output/varlociraptor/test_normal.vcf.gz
+      md5sum: d5a543484daebc9b32b320ed45921e7e
+    - path: output/varlociraptor/test_normal_called.vcf.gz
+      md5sum: c1a586c5b28b32f06a3f2f19c2eefcd3
+    - path: output/varlociraptor/test_tumor.alignment-properties.json
+      contains:
+        - '"mean": 351.2137723747417,'
+    - path: output/varlociraptor/test_tumor.vcf.gz
+      md5sum: 338202061231d567ad323f7aac36a87b
+    - path: output/varlociraptor/versions.yml