Skip to content

Commit

Permalink
add varlociraptor call subcommand (nf-core#3529)
Browse files Browse the repository at this point in the history
* add call subcommand

* at calls runs now [skip ci]

* add test files

* linting

* fix tests

* add contains check

* Update tests/modules/nf-core/varlociraptor/callvariants/main.nf

Co-authored-by: Maxime U Garcia <maxime.garcia@scilifelab.se>

---------

Co-authored-by: Maxime U Garcia <maxime.garcia@scilifelab.se>
  • Loading branch information
2 people authored and snesic committed Oct 10, 2023
1 parent 2d04695 commit c32c0e6
Show file tree
Hide file tree
Showing 11 changed files with 333 additions and 6 deletions.
48 changes: 48 additions & 0 deletions modules/nf-core/varlociraptor/callvariants/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
process VARLOCIRAPTOR_CALLVARIANTS {
tag "$meta.id"
label 'process_single'

conda "bioconda::varlociraptor=8.1.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/varlociraptor:8.1.1--hc349b7f_0':
'biocontainers/varlociraptor:8.1.1--hc349b7f_0' }"

input:
tuple val(meta), path(normal_vcf), path(tumor_vcf)
path (scenario)
val (scenario_sample_name)

output:
tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true
tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true
tuple val(meta), path("*.bcf") , emit: bcf , optional: true
tuple val(meta), path("*.vcf") , emit: vcf , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz"

//If we use a scenario file and if there is more than 1 normal vcf, then collect scenario_sample_name and normal vcf to scenario_sample_name_0=normal_vcf_0 scenario_sample_name_1=normal_vcf_1, etc
//If we use a scenario file and if there is exactly 1 normal vcf, then scenario_sample_name=normal_vcf
//Else do nothing
def scenario_samples = normal_vcf instanceof List && normal_vcf.size() > 1 ? [scenario_sample_name,normal_vcf].transpose().collect{"${it[0]}=${it[1]}"}.join(' ') : "${scenario_sample_name}=${normal_vcf}"

//If no scenario is provided, fall back to tumor-normal paired calling
def scenario_command = scenario ? "generic --scenario $scenario --obs ${scenario_samples}" : "tumor-normal --tumor ${tumor_vcf} --normal ${normal_vcf}"

"""
varlociraptor call variants \\
--output ${prefix} \\
${scenario_command} \\
$args
cat <<-END_VERSIONS > versions.yml
"${task.process}":
varlociraptor: \$(echo \$(varlociraptor --version 2>&1) | sed 's/^.*varlociraptor //; s/:.*\$//' )
END_VERSIONS
"""
}
66 changes: 66 additions & 0 deletions modules/nf-core/varlociraptor/callvariants/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: "varlociraptor_callvariants"
description: Call variants for a given scenario specified with the varlociraptor calling grammar, preprocessed by varlociraptor preprocessing
keywords:
- observations
- variants
- calling
tools:
- "varlociraptor":
description: "Flexible, uncertainty-aware variant calling with parameter free filtration via FDR control."
homepage: "https://varlociraptor.github.io/docs/estimating/"
documentation: "https://varlociraptor.github.io/docs/calling/"
tool_dev_url: "https://github.com/varlociraptor/varlociraptor"
doi: "10.1186/s13059-020-01993-6"
licence: "['GPL v3']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- normal_vcf:
type: file
description: Sorted VCF/BCF file (list)
pattern: "*.{bcf,vcf,vcf.gz}"
- tumor_vcf:
type: file
description: Sorted VCF/BCF file (optional)
pattern: "*.{bcf,bcf.gz,vcf,vcf.gz}"
- scenario:
type: file
description: Yaml file containing scenario information (optional)
pattern: "*.{yml,yaml}"
- scenario_sample:
type: string
description: (List of) sample name(s) to be processed as named in the scenario yml (optional)
pattern: "*.{bcf,vcf,vcf.gz}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- vcf_gz:
type: file
description: Compressed VCF file containing sample observations
pattern: "*.vcf.gz"
- bcf_gz:
type: file
description: BCF file containing sample observations
pattern: "*.bcf.gz"
- vcf:
type: file
description: VCF file containing sample observations
pattern: "*.vcf"
- bcf:
type: file
description: BCF file containing sample observations
pattern: "*.bcf"

authors:
- "@FriederikeHanssen"
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ process VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES {

input:
tuple val(meta), path(bam)
tuple val(meta), path(fasta)
tuple val(meta), path(fai)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)

output:
tuple val(meta), path("*.alignment-properties.json"), emit: alignment_properties_json
Expand Down
6 changes: 4 additions & 2 deletions modules/nf-core/varlociraptor/preprocess/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@ process VARLOCIRAPTOR_PREPROCESS {
output:
tuple val(meta), path("*.bcf.gz"), emit: bcf_gz, optional: true
tuple val(meta), path("*.vcf.gz"), emit: vcf_gz, optional: true
tuple val(meta), path("*.bcf") , emit: bcf , optional: true
tuple val(meta), path("*.vcf") , emit: vcf , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def prefix = task.ext.prefix ?: "${meta.id}.vcf.gz"
def alignment_properties_json = alignment_json ? "--alignment-properties ${alignment_json}" : ""
"""
varlociraptor preprocess variants \\
Expand All @@ -31,7 +33,7 @@ process VARLOCIRAPTOR_PREPROCESS {
--bam $bam \\
--candidates $candidates \\
${args} \\
> ${prefix}.vcf.gz
> ${prefix}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
8 changes: 8 additions & 0 deletions modules/nf-core/varlociraptor/preprocess/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ output:
type: file
description: Compressed VCF file containing sample observations
pattern: "*.vcf.gz"
- bcf_gz:
type: file
description: BCF file containing sample observations
pattern: "*.bcf.gz"
- vcf:
type: file
description: VCF file containing sample observations
pattern: "*.vcf"
- bcf:
type: file
description: BCF file containing sample observations
Expand Down
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3929,6 +3929,10 @@ variantbam:
- modules/nf-core/variantbam/**
- tests/modules/nf-core/variantbam/**

varlociraptor/callvariants:
- modules/nf-core/varlociraptor/callvariants/**
- tests/modules/nf-core/varlociraptor/callvariants/**

varlociraptor/estimatealignmentproperties:
- modules/nf-core/varlociraptor/estimatealignmentproperties/**
- tests/modules/nf-core/varlociraptor/estimatealignmentproperties/**
Expand Down
4 changes: 3 additions & 1 deletion tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,8 @@ params {
test_flowcell = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell.tar.gz"
test_flowcell_samplesheet = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/bcl/flowcell_samplesheet.csv"

varlociraptor_scenario = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/varlociraptor/scenario.yml"

contig_ploidy_priors_table = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/gatk/contig_ploidy_priors_table.tsv"

purecn_ex1_bam = "${params.test_data_base}/data/genomics/homo_sapiens/illumina/purecn/purecn_ex1.bam"
Expand Down Expand Up @@ -633,7 +635,7 @@ params {
yeast_ups = "${params.test_data_base}/data/proteomics/database/yeast_UPS.fasta"
}
'maxquant' {
mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv"
mq_contrasts = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_contrasts.csv"
mq_proteingroups = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_proteinGroups.txt"
mq_samplesheet = "${params.test_data_base}/data/proteomics/maxquant/MaxQuant_samplesheet.tsv"

Expand Down
136 changes: 136 additions & 0 deletions tests/modules/nf-core/varlociraptor/callvariants/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf'
include { VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES as VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR } from '../../../../../modules/nf-core/varlociraptor/estimatealignmentproperties/main.nf'
include { VARLOCIRAPTOR_PREPROCESS as VARLOCIRAPTOR_PREPROCESS_NORMAL } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf'
include { VARLOCIRAPTOR_PREPROCESS as VARLOCIRAPTOR_PREPROCESS_TUMOR } from '../../../../../modules/nf-core/varlociraptor/preprocess/main.nf'
include { VARLOCIRAPTOR_CALLVARIANTS } from '../../../../../modules/nf-core/varlociraptor/callvariants/main.nf'

workflow test_varlociraptor_callvariants_scenario_singlesample {

bam_normal = [
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
]

fasta = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]

fai= [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
]

VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)

input_normal = Channel.of([
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
]).collect()

VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)

scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true))

VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.map{meta1, vcf -> [meta1, vcf, []]}, scenario, "normal" )
}

workflow test_varlociraptor_callvariants_scenario_multisample {

bam_normal = [
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
]

bam_tumor = [
[ id:'test_tumor', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
]

fasta = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
]

fai= [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_fasta_fai'], checkIfExists: true)
]

VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)
VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai)

input_normal = Channel.of([
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome_vcf'], checkIfExists: true),
]).collect()

input_tumor = Channel.of([
[ id:'test_tumor', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_genome_vcf'], checkIfExists: true),
]).collect()

VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)
VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai)

scenario = Channel.of(file(params.test_data['homo_sapiens']['illumina']['varlociraptor_scenario'], checkIfExists: true))

VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.concat(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, [vcf1, vcf2], []]}, scenario, ["normal","normal"] )
}

workflow test_varlociraptor_callvariants_tumor_normal {

bam_normal = [
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
]

bam_tumor = [
[ id:'test_tumor', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
]

fasta = [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
]

fai= [
[ id:'test', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['genome']['genome_21_fasta_fai'], checkIfExists: true)
]

VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL( bam_normal, fasta, fai)
VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR( bam_tumor, fasta, fai)

input_normal = Channel.of([
[ id:'test_normal', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
]).collect()

input_tumor = Channel.of([
[ id:'test_tumor', single_end:false ], // meta map
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_paired_end_recalibrated_sorted_bam_bai'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test_genome21_indels_vcf_gz'], checkIfExists: true),
]).collect()

VARLOCIRAPTOR_PREPROCESS_NORMAL(input_normal.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_NORMAL.out.alignment_properties_json), fasta, fai)
VARLOCIRAPTOR_PREPROCESS_TUMOR(input_tumor.join(VARLOCIRAPTOR_ESTIMATEALIGNMENTPROPERTIES_TUMOR.out.alignment_properties_json), fasta, fai)


VARLOCIRAPTOR_CALLVARIANTS ( VARLOCIRAPTOR_PREPROCESS_NORMAL.out.vcf_gz.combine(VARLOCIRAPTOR_PREPROCESS_TUMOR.out.vcf_gz).collect().map{meta1, vcf1, meta2, vcf2 -> [meta1, vcf1, vcf2]},[], [] )
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

withName:VARLOCIRAPTOR_CALLVARIANTS {
ext.prefix = { "${meta.id}_called.vcf.gz" }
}
}
52 changes: 52 additions & 0 deletions tests/modules/nf-core/varlociraptor/callvariants/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_singlesample
command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_singlesample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
tags:
- varlociraptor/callvariants
- varlociraptor
files:
- path: output/varlociraptor/test_normal.alignment-properties.json
contains:
- '"mean": 124.89494470774092,'
- path: output/varlociraptor/test_normal.vcf.gz
md5sum: a091dfd1192e774490e68d72f9db3976
- path: output/varlociraptor/test_normal_called.vcf.gz
- path: output/varlociraptor/versions.yml

- name: varlociraptor callvariants test_varlociraptor_callvariants_scenario_multisample
command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_scenario_multisample -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
tags:
- varlociraptor/callvariants
- varlociraptor
files:
- path: output/varlociraptor/test_normal.alignment-properties.json
contains:
- '"mean": 124.89494470774092,'
- path: output/varlociraptor/test_normal.vcf.gz
md5sum: a091dfd1192e774490e68d72f9db3976
- path: output/varlociraptor/test_normal_called.vcf.gz
- path: output/varlociraptor/test_tumor.alignment-properties.json
contains:
- '"mean": 126.49488752556238,'
- path: output/varlociraptor/test_tumor.vcf.gz
md5sum: e84afe39f48e9bd197ae9952e15adb92
- path: output/varlociraptor/versions.yml

- name: varlociraptor callvariants test_varlociraptor_callvariants_tumor_normal
command: nextflow run ./tests/modules/nf-core/varlociraptor/callvariants -entry test_varlociraptor_callvariants_tumor_normal -c ./tests/config/nextflow.config -c ./tests/modules/nf-core/varlociraptor/callvariants/nextflow.config
tags:
- varlociraptor/callvariants
- varlociraptor
files:
- path: output/varlociraptor/test_normal.alignment-properties.json
contains:
- '"mean": 351.71182796866435,'
- path: output/varlociraptor/test_normal.vcf.gz
md5sum: d5a543484daebc9b32b320ed45921e7e
- path: output/varlociraptor/test_normal_called.vcf.gz
md5sum: c1a586c5b28b32f06a3f2f19c2eefcd3
- path: output/varlociraptor/test_tumor.alignment-properties.json
contains:
- '"mean": 351.2137723747417,'
- path: output/varlociraptor/test_tumor.vcf.gz
md5sum: 338202061231d567ad323f7aac36a87b
- path: output/varlociraptor/versions.yml

0 comments on commit c32c0e6

Please sign in to comment.